output.var = params$output.var
transform.abs = FALSE
log.pred = params$log.pred
norm.pred = FALSE
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 8
## $ output.var : chr "y3"
## $ log.pred : logi TRUE
## $ eda : logi FALSE
## $ algo.forward.caret : logi TRUE
## $ algo.backward.caret: logi TRUE
## $ algo.stepwise.caret: logi TRUE
## $ algo.LASSO.caret : logi TRUE
## $ algo.LARS.caret : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE) paste0(output.var,'.log') else output.var.tr = output.var
feat = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')
cc = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
## JobName y3
## Job_00001: 1 Min. : 95.91
## Job_00002: 1 1st Qu.:118.29
## Job_00003: 1 Median :124.03
## Job_00004: 1 Mean :125.40
## Job_00007: 1 3rd Qu.:131.06
## Job_00008: 1 Max. :193.73
## (Other) :6974
The Output Variable y3 shows right skewness, so will proceed with a log transformation
df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density()
#stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
ggplot(gather(select_at(data,output.var)), aes(sample=value)) +
stat_qq() +
facet_wrap(~key, scales = 'free',ncol=4)
if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density() +
# stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
facet_wrap(~key, scales = 'free',ncol=2)
ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) +
stat_qq() +
facet_wrap(~key, scales = 'free',ncol=4)
Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project
t=bestNormalize::bestNormalize(data[[output.var]])
t
## Best Normalizing transformation with 6980 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - No transform: 2.8839
## - Box-Cox: 1.3694
## - Log_b(x+a): 1.9466
## - sqrt(x+a): 2.3519
## - exp(x): 749.0131
## - arcsinh(x): 1.9466
## - Yeo-Johnson: 1.1427
## - orderNorm: 1.1189
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## orderNorm Transformation with 6980 nonmissing obs and no ties
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 95.913 118.289 124.030 131.059 193.726
qqnorm(data[[output.var]])
qqnorm(predict(t))
orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution
All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)
Histograms
if (eda == TRUE){
cols = c('x11','x18','stat98','x7','stat110')
df=gather(select_at(data,cols))
ggplot(df, aes(value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density() +
# stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
facet_wrap(~key, scales = 'free',ncol=3)
# ggplot(gather(select_at(data,cols)), aes(sample=value)) +
# stat_qq()+
# facet_wrap(~key, scales = 'free',ncol=2)
lapply(select_at(data,cols),summary)
}
Scatter plot vs. output variable **y3.log
if (eda == TRUE){
d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) +
geom_point(color='light green',alpha=0.5) +
geom_smooth() +
facet_wrap(~target, scales = 'free',ncol=3)
}
All indicators have a strong indication of Fat-Tails
if (eda == TRUE){
df=gather(select_at(data,predictors))
ggplot(df, aes(value)) +
geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
geom_density() +
# stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))
facet_wrap(~key, scales = 'free',ncol=4)
}
if (eda == TRUE){
#chart.Correlation(select(data,-JobName), pch=21)
t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
,select_at(data,output.var.tr)),4)) %>%
rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
#DT::datatable(t)
message("Top Positive")
kable(head(arrange(t,desc(y3.log)),20))
message("Top Negative")
kable(head(arrange(t,y3.log),20))
}
if (eda == TRUE){
#chart.Correlation(select(data,-JobName), pch=21)
t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
#DT::datatable(t,options=list(scrollX=T))
message("Showing only 10 variables")
kable(t[1:10,1:10])
}
Scatter plots with all predictors and the output variable (y3.log)
if (eda == TRUE){
d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) +
geom_point(color='light blue',alpha=0.5) +
geom_smooth() +
facet_wrap(~target, scales = 'free',ncol=4)
}
No Multicollinearity among predictors
Showing Top predictor by VIF Value
if (eda == TRUE){
vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
head(vifDF,15)
}
data.tr=data %>%
mutate(x18.sqrt = sqrt(x18))
cols=c('x18','x18.sqrt')
# ggplot(gather(select_at(data.tr,cols)), aes(value)) +
# geom_histogram(aes(y=..density..),bins = 50,fill='light blue') +
# geom_density() +
# facet_wrap(~key, scales = 'free',ncol=4)
d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) +
geom_point(color='light blue',alpha=0.5) +
geom_smooth() +
facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#removing unwanted variables
data.tr=data.tr %>%
dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])
data=data.tr
label.names=output.var.tr
InteractionMode = 2 # 0 for no interaction, 1 for Full 2 way interaction and 2 for Intelligent Interaction
pca.vars = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]
if(InteractionMode == 1){
pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
#saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
pca.model = prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode == 2){
controlled.vars = pca.vars[grep("^x",pca.vars)]
stat.vars = pca.vars[grep("^stat",pca.vars)]
two.way = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
no.interact = paste0(stat.vars, collapse ='+')
pca.formula = as.formula(paste(two.way, no.interact, sep = "+"))
pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}
targetCumVar = .8
pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 138 PCAs justify 80.0% of the total Variance. (79.9%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained", type='b')
plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')
screeplot(pca.model,npcs = pca.model$pcaSelCount)
screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')
#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>%
dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
,!!colnames(pca.model$rotation)[pca.model$pcaSel])
)
data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)
data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)
plot.diagnostics <- function(model, train) {
plot(model)
residuals = resid(model) # Plotted above in plot(lm.out)
r.standard = rstandard(model)
r.student = rstudent(model)
df = data.frame(x=predict(model,train),y=r.student)
p=ggplot(data=df,aes(x=x,y=y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_hline(yintercept = 0,size=1)+
ylab("Student Residuals") +
xlab("Predicted Values")+
ggtitle("Student Residual Plot")
plot(p)
df = data.frame(x=predict(model,train),y=r.standard)
p=ggplot(data=df,aes(x=x,y=y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_hline(yintercept = c(-2,0,2),size=1)+
ylab("Student Residuals") +
xlab("Predicted Values")+
ggtitle("Student Residual Plot")
plot(p)
# Histogram
df=data.frame(r.student)
p=ggplot(data=df,aes(r.student)) +
geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) +
stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
ylab("Density")+
xlab("Studentized Residuals")+
ggtitle("Distribution of Studentized Residuals")
plot(p)
# http://www.stat.columbia.edu/~martin/W2024/R7.pdf
# Influential plots
inf.meas = influence.measures(model)
# print (summary(inf.meas)) # too much data
# Leverage plot
lev = hat(model.matrix(model))
df=tibble::rownames_to_column(as.data.frame(lev),'id')
p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
ylab('Leverage - check') +
xlab('Index')
plot(p)
# Cook's Distance
cd = cooks.distance(model)
df=tibble::rownames_to_column(as.data.frame(cd),'id')
p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
ylab('Cooks distances') +
geom_hline(yintercept = c(4/nrow(train),0),size=1)+
xlab('Index')
plot(p)
print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = ""))
print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = ""))
return(cd)
}
# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
#B is the number of resamples and integer vector of M (numbers + tune length if any)
B <- if (method == "cv") numbers
else if(method == "repeatedcv") numbers * repeats
else NULL
if(is.null(length)) {
seeds <- NULL
} else {
set.seed(seed = seed)
seeds <- vector(mode = "list", length = B)
seeds <- lapply(seeds, function(x) sample.int(n = 1000000
, size = numbers + ifelse(is.null(tunes), 0, tunes)))
seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
}
# return seeds
seeds
}
train.caret.glmselect = function(formula, data, method
,subopt = NULL, feature.names
, train.control = NULL, tune.grid = NULL, pre.proc = NULL){
if(is.null(train.control)){
train.control <- trainControl(method = "cv"
,number = 10
,seeds = setCaretSeeds(method = "cv"
, numbers = 10
, seed = 1701)
,search = "grid"
,verboseIter = TRUE
,allowParallel = TRUE
)
}
if(is.null(tune.grid)){
if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
tune.grid = data.frame(nvmax = 1:length(feature.names))
}
if (method == 'glmnet' && subopt == 'LASSO'){
# Will only show 1 Lambda value during training, but that is OK
# https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
# Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
lambda = 10^seq(-2,0, length =100)
alpha = c(1)
tune.grid = expand.grid(alpha = alpha,lambda = lambda)
}
if (method == 'lars'){
# https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
fraction = seq(0, 1, length = 100)
tune.grid = expand.grid(fraction = fraction)
pre.proc = c("center", "scale")
}
}
# http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
# #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
registerDoParallel(cl)
set.seed(1)
# note that the seed has to actually be set just before this function is called
# settign is above just not ensure reproducibility for some reason
model.caret <- caret::train(formula
, data = data
, method = method
, tuneGrid = tune.grid
, trControl = train.control
, preProc = pre.proc
)
stopCluster(cl)
registerDoSEQ() # register sequential engine in case you are not using this function anymore
if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
print("All models results")
print(model.caret$results) # all model results
print("Best Model")
print(model.caret$bestTune) # best model
model = model.caret$finalModel
# Metrics Plot
dataPlot = model.caret$results %>%
gather(key='metric',value='value',-nvmax) %>%
dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
geom_line(color='lightblue4') +
geom_point(color='blue',alpha=0.7,size=.9) +
facet_wrap(~metric,ncol=2,scales='free_y')+
theme_light()
plot(metricsPlot)
# Residuals Plot
# leap function does not support studentized residuals
dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
geom_point(color='light blue',alpha=0.7) +
geom_smooth(method="lm")+
theme_light()
plot(residPlot)
residHistogram = ggplot(dataPlot,aes(x=res)) +
geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
#geom_density(color='lightblue4') +
stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
, sd = sd(dataPlot$res)),color='lightblue4')
theme_light()
plot(residHistogram)
id = rownames(model.caret$bestTune)
# Provides the coefficients of the best model
# regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
# https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
print("Coefficients of final model:")
coefs <- coef(model, id=id)
#calculate the model to the the coef intervals
nams <- names(coefs)
nams <- nams[!nams %in% "(Intercept)"]
response <- as.character(formula[[2]])
form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
mod <- lm(form, data = data)
#coefs
#coef(mod)
print(car::Confint(mod))
return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
,modelLM=mod))
}
if (method == 'glmnet' && subopt == 'LASSO'){
print(model.caret)
print(plot(model.caret))
print(model.caret$bestTune)
print(model.caret$results)
model=model.caret$finalModel
# Metrics Plot
dataPlot = model.caret$results %>%
gather(key='metric',value='value',-lambda) %>%
dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
geom_line(color='lightblue4') +
geom_point(color='blue',alpha=0.7,size=.9) +
facet_wrap(~metric,ncol=2,scales='free_y')+
theme_light()
plot(metricsPlot)
# Residuals Plot
dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
geom_point(color='light blue',alpha=0.7) +
geom_smooth(method="lm")+
theme_light()
plot(residPlot)
residHistogram = ggplot(dataPlot,aes(x=res)) +
geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
#geom_density(color='lightblue4') +
stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
, sd = sd(dataPlot$res)),color='lightblue4')
theme_light()
plot(residHistogram)
print("Coefficients")
#no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
t=coef(model,s=model.caret$bestTune$lambda)
model.coef = t[which(t[,1]!=0),]
print(as.data.frame(model.coef))
id = NULL # not really needed but added for consistency
return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
}
if (method == 'lars'){
print(model.caret)
print(plot(model.caret))
print(model.caret$bestTune)
# Metrics Plot
dataPlot = model.caret$results %>%
gather(key='metric',value='value',-fraction) %>%
dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
geom_line(color='lightblue4') +
geom_point(color='blue',alpha=0.7,size=.9) +
facet_wrap(~metric,ncol=2,scales='free_y')+
theme_light()
plot(metricsPlot)
# Residuals Plot
dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
geom_point(color='light blue',alpha=0.7) +
geom_smooth(method="lm")+
theme_light()
plot(residPlot)
residHistogram = ggplot(dataPlot,aes(x=res)) +
geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
#geom_density(color='lightblue4') +
stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
, sd = sd(dataPlot$res)),color='lightblue4')
theme_light()
plot(residHistogram)
print("Coefficients")
t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
model.coef = t[which(t!=0)]
print(model.coef)
id = NULL # not really needed but added for consistency
return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
}
}
# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
#form <- as.formula(object$call[[2]])
mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
coefi <- coef(object, id = id)
xvars <- names(coefi)
return(mat[,xvars]%*%coefi)
}
test.model = function(model, test, level=0.95
,draw.limits = FALSE, good = 0.1, ok = 0.15
,method = NULL, subopt = NULL
,id = NULL, formula, feature.names, label.names
,transformation = NULL){
## if using caret for glm select equivalent functionality,
## need to pass formula (full is ok as it will select subset of variables from there)
if (is.null(method)){
pred = predict(model, newdata=test, interval="confidence", level = level)
}
if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
}
if (method == 'glmnet' && subopt == 'LASSO'){
xtest = as.matrix(test[,feature.names])
pred=as.data.frame(predict(model, xtest))
}
if (method == 'lars'){
pred=as.data.frame(predict(model, newdata = test))
}
# Summary of predicted values
print ("Summary of predicted values: ")
print(summary(pred[,1]))
test.mse = mean((test[,label.names]-pred[,1])^2)
print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
if(log.pred == TRUE || norm.pred == TRUE){
# plot transformewd comparison first
df=data.frame(x=test[,label.names],y=pred[,1])
ggplot(df,aes(x=x,y=y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_abline(slope=1,intercept=0,color='black',size=1) +
#scale_y_continuous(limits=c(min(df),max(df)))+
xlab("Actual (Transformed)")+
ylab("Predicted (Transformed)")
}
if (log.pred == FALSE && norm.pred == FALSE){
x = test[,label.names]
y = pred[,1]
}
if (log.pred == TRUE){
x = 10^test[,label.names]
y = 10^pred[,1]
}
if (norm.pred == TRUE){
x = predict(transformation, test[,label.names], inverse = TRUE)
y = predict(transformation, pred[,1], inverse = TRUE)
}
df=data.frame(x,y)
ggplot(df,aes(x,y)) +
geom_point(color='blue',alpha=0.5,shape=20,size=2) +
geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
#scale_y_continuous(limits=c(min(df),max(df)))+
xlab("Actual")+
ylab("Predicted")
}
n <- names(data.train)
formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
," ~", paste(n[!n %in% label.names], collapse = " + ")))
grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))
print(formula)
## y3.log ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 +
## PC10 + PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 +
## PC19 + PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + PC27 +
## PC28 + PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + PC36 +
## PC37 + PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + PC45 +
## PC46 + PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + PC54 +
## PC55 + PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + PC63 +
## PC64 + PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + PC72 +
## PC73 + PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + PC81 +
## PC82 + PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + PC90 +
## PC91 + PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + PC99 +
## PC100 + PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + PC107 +
## PC108 + PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + PC115 +
## PC116 + PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + PC123 +
## PC124 + PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + PC131 +
## PC132 + PC133 + PC134 + PC135 + PC136 + PC137 + PC138
print(grand.mean.formula)
## y3.log ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]
model.full = lm(formula , data.train)
summary(model.full)
##
## Call:
## lm(formula = formula, data = data.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.090531 -0.022188 -0.005378 0.017039 0.193080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.097e+00 4.297e-04 4879.440 < 2e-16 ***
## PC1 -9.765e-04 9.180e-05 -10.637 < 2e-16 ***
## PC2 1.204e-03 1.013e-04 11.890 < 2e-16 ***
## PC3 -2.780e-04 1.116e-04 -2.491 0.012781 *
## PC4 1.683e-04 1.127e-04 1.493 0.135525
## PC5 6.824e-04 1.147e-04 5.947 2.91e-09 ***
## PC6 7.099e-05 1.156e-04 0.614 0.539006
## PC7 -5.115e-04 1.173e-04 -4.362 1.31e-05 ***
## PC8 -2.066e-04 1.169e-04 -1.768 0.077126 .
## PC9 -2.320e-04 1.222e-04 -1.899 0.057653 .
## PC10 2.618e-04 1.241e-04 2.110 0.034880 *
## PC11 -1.364e-03 1.267e-04 -10.763 < 2e-16 ***
## PC12 3.532e-04 1.265e-04 2.792 0.005255 **
## PC13 4.169e-04 1.308e-04 3.188 0.001441 **
## PC14 1.499e-03 1.299e-04 11.536 < 2e-16 ***
## PC15 -3.569e-04 1.332e-04 -2.679 0.007406 **
## PC16 7.894e-04 1.362e-04 5.798 7.10e-09 ***
## PC17 -1.726e-04 1.406e-04 -1.228 0.219558
## PC18 4.057e-04 1.444e-04 2.809 0.004987 **
## PC19 -3.291e-04 1.485e-04 -2.216 0.026752 *
## PC20 9.082e-04 1.557e-04 5.833 5.77e-09 ***
## PC21 -1.022e-03 1.593e-04 -6.416 1.52e-10 ***
## PC22 4.011e-03 1.661e-04 24.151 < 2e-16 ***
## PC23 -5.266e-04 3.335e-04 -1.579 0.114371
## PC24 -1.453e-03 3.711e-04 -3.915 9.14e-05 ***
## PC25 9.771e-04 3.678e-04 2.656 0.007922 **
## PC26 -4.768e-04 3.705e-04 -1.287 0.198170
## PC27 -3.659e-04 3.725e-04 -0.982 0.325991
## PC28 7.895e-04 3.738e-04 2.112 0.034723 *
## PC29 -7.187e-04 3.739e-04 -1.922 0.054654 .
## PC30 6.490e-04 3.742e-04 1.735 0.082856 .
## PC31 -3.553e-04 3.760e-04 -0.945 0.344797
## PC32 1.333e-03 3.736e-04 3.567 0.000364 ***
## PC33 1.430e-04 3.742e-04 0.382 0.702302
## PC34 -4.754e-05 3.761e-04 -0.126 0.899408
## PC35 -6.553e-05 3.784e-04 -0.173 0.862527
## PC36 5.842e-04 3.785e-04 1.544 0.122763
## PC37 -3.842e-04 3.780e-04 -1.016 0.309441
## PC38 4.310e-04 3.826e-04 1.126 0.260035
## PC39 3.447e-04 3.839e-04 0.898 0.369208
## PC40 3.159e-04 3.805e-04 0.830 0.406437
## PC41 -6.995e-04 3.789e-04 -1.846 0.064913 .
## PC42 -5.578e-04 3.819e-04 -1.460 0.144214
## PC43 7.032e-04 3.842e-04 1.830 0.067271 .
## PC44 -5.113e-04 3.821e-04 -1.338 0.180863
## PC45 1.228e-03 3.828e-04 3.208 0.001346 **
## PC46 1.505e-03 3.846e-04 3.913 9.25e-05 ***
## PC47 5.523e-04 3.863e-04 1.430 0.152856
## PC48 5.154e-05 3.868e-04 0.133 0.894008
## PC49 1.915e-04 3.867e-04 0.495 0.620475
## PC50 -8.834e-04 3.867e-04 -2.285 0.022373 *
## PC51 5.447e-04 3.829e-04 1.422 0.154941
## PC52 4.093e-04 3.853e-04 1.062 0.288210
## PC53 -5.554e-04 3.898e-04 -1.425 0.154263
## PC54 -4.988e-05 3.909e-04 -0.128 0.898461
## PC55 1.690e-04 3.884e-04 0.435 0.663498
## PC56 1.699e-05 3.936e-04 0.043 0.965566
## PC57 4.239e-04 3.897e-04 1.088 0.276689
## PC58 -5.486e-04 3.915e-04 -1.401 0.161161
## PC59 1.273e-03 3.931e-04 3.239 0.001207 **
## PC60 -8.035e-04 3.945e-04 -2.037 0.041702 *
## PC61 -2.923e-04 3.980e-04 -0.734 0.462741
## PC62 -5.600e-04 3.928e-04 -1.426 0.154017
## PC63 6.699e-04 3.960e-04 1.692 0.090753 .
## PC64 1.974e-04 3.969e-04 0.497 0.619008
## PC65 8.933e-04 3.979e-04 2.245 0.024808 *
## PC66 1.768e-04 3.980e-04 0.444 0.656863
## PC67 4.047e-04 3.984e-04 1.016 0.309763
## PC68 8.856e-04 3.979e-04 2.226 0.026072 *
## PC69 -1.249e-03 3.988e-04 -3.132 0.001744 **
## PC70 8.471e-05 4.004e-04 0.212 0.832448
## PC71 -8.288e-04 3.997e-04 -2.073 0.038177 *
## PC72 8.328e-04 3.984e-04 2.090 0.036640 *
## PC73 5.934e-04 3.983e-04 1.490 0.136278
## PC74 -1.005e-04 4.041e-04 -0.249 0.803594
## PC75 2.561e-04 4.045e-04 0.633 0.526635
## PC76 1.605e-03 4.004e-04 4.008 6.22e-05 ***
## PC77 -3.733e-05 4.023e-04 -0.093 0.926070
## PC78 -3.865e-04 4.041e-04 -0.956 0.338911
## PC79 8.152e-04 4.023e-04 2.027 0.042759 *
## PC80 -3.806e-04 4.033e-04 -0.944 0.345438
## PC81 -5.816e-04 4.070e-04 -1.429 0.153099
## PC82 -1.121e-03 4.035e-04 -2.779 0.005469 **
## PC83 1.166e-03 4.040e-04 2.886 0.003922 **
## PC84 2.597e-05 4.045e-04 0.064 0.948808
## PC85 4.157e-04 4.065e-04 1.022 0.306608
## PC86 5.556e-04 4.107e-04 1.353 0.176125
## PC87 5.798e-04 4.082e-04 1.420 0.155570
## PC88 4.339e-04 4.089e-04 1.061 0.288674
## PC89 2.791e-04 4.059e-04 0.688 0.491629
## PC90 -2.377e-03 4.085e-04 -5.818 6.30e-09 ***
## PC91 4.025e-04 4.086e-04 0.985 0.324562
## PC92 2.796e-04 4.108e-04 0.681 0.496073
## PC93 -1.612e-04 4.119e-04 -0.391 0.695572
## PC94 5.326e-04 4.118e-04 1.293 0.195976
## PC95 2.456e-04 4.115e-04 0.597 0.550627
## PC96 -8.829e-04 4.138e-04 -2.134 0.032899 *
## PC97 -2.984e-04 4.133e-04 -0.722 0.470300
## PC98 5.901e-04 4.157e-04 1.420 0.155761
## PC99 -1.017e-05 4.139e-04 -0.025 0.980409
## PC100 -5.491e-05 4.174e-04 -0.132 0.895335
## PC101 -9.925e-04 4.134e-04 -2.401 0.016389 *
## PC102 1.445e-03 4.172e-04 3.464 0.000536 ***
## PC103 1.656e-04 4.177e-04 0.397 0.691745
## PC104 6.401e-04 4.169e-04 1.536 0.124718
## PC105 2.704e-04 4.187e-04 0.646 0.518401
## PC106 9.546e-04 4.192e-04 2.277 0.022802 *
## PC107 4.008e-04 4.181e-04 0.959 0.337768
## PC108 -1.211e-03 4.190e-04 -2.891 0.003861 **
## PC109 -3.250e-04 4.196e-04 -0.775 0.438615
## PC110 -4.030e-04 4.192e-04 -0.961 0.336434
## PC111 -8.087e-04 4.201e-04 -1.925 0.054267 .
## PC112 9.124e-04 4.205e-04 2.170 0.030038 *
## PC113 -9.447e-06 4.207e-04 -0.022 0.982085
## PC114 -3.020e-04 4.224e-04 -0.715 0.474698
## PC115 -6.572e-04 4.211e-04 -1.561 0.118609
## PC116 -6.015e-05 4.214e-04 -0.143 0.886486
## PC117 -1.682e-03 4.229e-04 -3.977 7.06e-05 ***
## PC118 -9.490e-04 4.240e-04 -2.238 0.025235 *
## PC119 -8.211e-04 4.273e-04 -1.922 0.054703 .
## PC120 -5.498e-04 4.255e-04 -1.292 0.196351
## PC121 4.036e-04 4.281e-04 0.943 0.345750
## PC122 -7.965e-05 4.310e-04 -0.185 0.853371
## PC123 -7.135e-04 4.282e-04 -1.666 0.095717 .
## PC124 -1.246e-04 4.270e-04 -0.292 0.770428
## PC125 -4.637e-05 4.282e-04 -0.108 0.913776
## PC126 1.035e-03 4.286e-04 2.415 0.015790 *
## PC127 1.069e-03 4.250e-04 2.515 0.011920 *
## PC128 -4.753e-04 4.318e-04 -1.101 0.271047
## PC129 -7.344e-04 4.301e-04 -1.708 0.087762 .
## PC130 -1.290e-04 4.319e-04 -0.299 0.765159
## PC131 1.157e-03 4.315e-04 2.681 0.007360 **
## PC132 1.509e-03 4.297e-04 3.513 0.000447 ***
## PC133 -4.910e-04 4.328e-04 -1.135 0.256580
## PC134 -2.839e-04 4.324e-04 -0.657 0.511472
## PC135 -1.602e-05 4.320e-04 -0.037 0.970424
## PC136 6.361e-04 4.353e-04 1.461 0.144009
## PC137 8.859e-05 4.332e-04 0.204 0.837979
## PC138 1.132e-03 4.376e-04 2.587 0.009709 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03202 on 5445 degrees of freedom
## Multiple R-squared: 0.2367, Adjusted R-squared: 0.2173
## F-statistic: 12.23 on 138 and 5445 DF, p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)
## [1] "Number of data points that have Cook's D > 4/n: 256"
## [1] "Number of data points that have Cook's D > 1: 0"
high.cd = names(cd.full[cd.full > 4/nrow(data.train)])
#save dataset with high.cd flagged
t = data.train %>%
rownames_to_column() %>%
mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
##
## Call:
## lm(formula = formula, data = data.train2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06020 -0.01906 -0.00325 0.01668 0.07266
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.093e+00 3.585e-04 5837.593 < 2e-16 ***
## PC1 -9.218e-04 7.658e-05 -12.038 < 2e-16 ***
## PC2 1.285e-03 8.456e-05 15.192 < 2e-16 ***
## PC3 -3.777e-04 9.308e-05 -4.057 5.04e-05 ***
## PC4 1.297e-04 9.388e-05 1.382 0.167007
## PC5 5.773e-04 9.574e-05 6.030 1.75e-09 ***
## PC6 9.269e-05 9.637e-05 0.962 0.336182
## PC7 -5.744e-04 9.761e-05 -5.884 4.25e-09 ***
## PC8 -1.877e-04 9.820e-05 -1.911 0.056036 .
## PC9 -2.546e-04 1.016e-04 -2.507 0.012196 *
## PC10 7.232e-05 1.035e-04 0.699 0.484852
## PC11 -1.438e-03 1.054e-04 -13.649 < 2e-16 ***
## PC12 4.231e-04 1.057e-04 4.004 6.32e-05 ***
## PC13 3.137e-04 1.093e-04 2.871 0.004110 **
## PC14 1.639e-03 1.084e-04 15.117 < 2e-16 ***
## PC15 -3.042e-04 1.114e-04 -2.731 0.006333 **
## PC16 8.430e-04 1.136e-04 7.418 1.38e-13 ***
## PC17 -1.613e-04 1.173e-04 -1.375 0.169075
## PC18 4.671e-04 1.208e-04 3.868 0.000111 ***
## PC19 -3.364e-04 1.239e-04 -2.715 0.006647 **
## PC20 9.398e-04 1.301e-04 7.221 5.91e-13 ***
## PC21 -8.681e-04 1.325e-04 -6.551 6.27e-11 ***
## PC22 4.004e-03 1.383e-04 28.948 < 2e-16 ***
## PC23 -6.460e-04 2.793e-04 -2.313 0.020745 *
## PC24 -1.379e-03 3.097e-04 -4.452 8.68e-06 ***
## PC25 1.017e-03 3.063e-04 3.322 0.000901 ***
## PC26 -7.350e-04 3.092e-04 -2.377 0.017497 *
## PC27 -2.869e-04 3.108e-04 -0.923 0.355918
## PC28 7.403e-04 3.117e-04 2.375 0.017572 *
## PC29 -5.809e-04 3.121e-04 -1.861 0.062745 .
## PC30 6.229e-04 3.124e-04 1.994 0.046190 *
## PC31 7.194e-05 3.143e-04 0.229 0.818955
## PC32 1.127e-03 3.115e-04 3.619 0.000299 ***
## PC33 1.899e-04 3.118e-04 0.609 0.542483
## PC34 -3.772e-04 3.137e-04 -1.202 0.229333
## PC35 2.116e-05 3.155e-04 0.067 0.946539
## PC36 5.659e-04 3.160e-04 1.791 0.073350 .
## PC37 -5.222e-04 3.150e-04 -1.658 0.097405 .
## PC38 2.701e-04 3.195e-04 0.845 0.397889
## PC39 2.622e-04 3.213e-04 0.816 0.414477
## PC40 2.346e-04 3.176e-04 0.739 0.460029
## PC41 -6.475e-04 3.162e-04 -2.048 0.040620 *
## PC42 -6.866e-04 3.196e-04 -2.148 0.031733 *
## PC43 6.060e-04 3.203e-04 1.892 0.058569 .
## PC44 -3.149e-04 3.191e-04 -0.987 0.323855
## PC45 1.319e-03 3.205e-04 4.117 3.90e-05 ***
## PC46 1.261e-03 3.210e-04 3.930 8.62e-05 ***
## PC47 1.995e-04 3.224e-04 0.619 0.536179
## PC48 -2.922e-04 3.241e-04 -0.902 0.367273
## PC49 -1.730e-04 3.229e-04 -0.536 0.592140
## PC50 -3.697e-04 3.225e-04 -1.146 0.251685
## PC51 4.581e-04 3.193e-04 1.435 0.151429
## PC52 6.001e-04 3.215e-04 1.866 0.062062 .
## PC53 -3.911e-04 3.249e-04 -1.204 0.228655
## PC54 -7.146e-06 3.260e-04 -0.022 0.982513
## PC55 1.482e-04 3.251e-04 0.456 0.648594
## PC56 1.084e-04 3.280e-04 0.331 0.740971
## PC57 1.750e-04 3.253e-04 0.538 0.590750
## PC58 -6.214e-04 3.267e-04 -1.902 0.057184 .
## PC59 8.635e-04 3.263e-04 2.646 0.008168 **
## PC60 -5.853e-04 3.291e-04 -1.779 0.075351 .
## PC61 -3.302e-04 3.319e-04 -0.995 0.319833
## PC62 -4.346e-04 3.278e-04 -1.326 0.184983
## PC63 7.022e-04 3.302e-04 2.126 0.033532 *
## PC64 -5.134e-05 3.306e-04 -0.155 0.876608
## PC65 8.445e-04 3.320e-04 2.544 0.010991 *
## PC66 1.845e-04 3.321e-04 0.556 0.578545
## PC67 2.889e-04 3.335e-04 0.866 0.386424
## PC68 1.116e-03 3.327e-04 3.354 0.000802 ***
## PC69 -1.129e-03 3.332e-04 -3.389 0.000707 ***
## PC70 -3.221e-04 3.331e-04 -0.967 0.333569
## PC71 -1.070e-03 3.338e-04 -3.206 0.001352 **
## PC72 6.772e-04 3.327e-04 2.035 0.041866 *
## PC73 5.754e-04 3.320e-04 1.733 0.083079 .
## PC74 2.526e-04 3.375e-04 0.748 0.454258
## PC75 -6.085e-05 3.365e-04 -0.181 0.856514
## PC76 1.592e-03 3.336e-04 4.773 1.87e-06 ***
## PC77 2.632e-05 3.355e-04 0.078 0.937491
## PC78 -3.115e-04 3.359e-04 -0.927 0.353916
## PC79 7.268e-04 3.371e-04 2.156 0.031115 *
## PC80 -2.630e-04 3.369e-04 -0.781 0.435045
## PC81 -6.244e-04 3.388e-04 -1.843 0.065388 .
## PC82 -8.437e-04 3.379e-04 -2.497 0.012571 *
## PC83 1.320e-03 3.375e-04 3.910 9.34e-05 ***
## PC84 -2.965e-05 3.381e-04 -0.088 0.930121
## PC85 4.614e-04 3.402e-04 1.356 0.175074
## PC86 1.777e-04 3.435e-04 0.517 0.604940
## PC87 5.984e-04 3.409e-04 1.755 0.079282 .
## PC88 1.529e-04 3.402e-04 0.449 0.653181
## PC89 2.414e-05 3.398e-04 0.071 0.943356
## PC90 -2.226e-03 3.413e-04 -6.523 7.54e-11 ***
## PC91 5.740e-04 3.428e-04 1.675 0.094078 .
## PC92 2.810e-04 3.432e-04 0.819 0.412995
## PC93 1.128e-05 3.427e-04 0.033 0.973746
## PC94 5.706e-04 3.441e-04 1.658 0.097330 .
## PC95 3.536e-04 3.435e-04 1.029 0.303312
## PC96 -6.900e-04 3.446e-04 -2.002 0.045339 *
## PC97 -2.183e-04 3.460e-04 -0.631 0.528103
## PC98 4.201e-04 3.474e-04 1.209 0.226596
## PC99 -4.002e-05 3.460e-04 -0.116 0.907935
## PC100 -2.645e-04 3.483e-04 -0.759 0.447644
## PC101 -8.152e-04 3.457e-04 -2.358 0.018418 *
## PC102 1.576e-03 3.468e-04 4.543 5.66e-06 ***
## PC103 2.746e-06 3.489e-04 0.008 0.993721
## PC104 5.770e-04 3.478e-04 1.659 0.097209 .
## PC105 5.902e-06 3.488e-04 0.017 0.986501
## PC106 1.003e-03 3.504e-04 2.862 0.004221 **
## PC107 3.966e-04 3.496e-04 1.134 0.256668
## PC108 -7.518e-04 3.490e-04 -2.155 0.031245 *
## PC109 -2.466e-04 3.493e-04 -0.706 0.480375
## PC110 -2.181e-04 3.494e-04 -0.624 0.532562
## PC111 -7.398e-04 3.501e-04 -2.113 0.034628 *
## PC112 7.066e-04 3.506e-04 2.015 0.043916 *
## PC113 1.701e-04 3.517e-04 0.484 0.628745
## PC114 -2.693e-04 3.524e-04 -0.764 0.444826
## PC115 -7.366e-04 3.519e-04 -2.093 0.036396 *
## PC116 -9.007e-06 3.509e-04 -0.026 0.979522
## PC117 -1.580e-03 3.536e-04 -4.468 8.07e-06 ***
## PC118 -9.378e-04 3.552e-04 -2.640 0.008314 **
## PC119 -8.418e-04 3.570e-04 -2.358 0.018424 *
## PC120 -6.197e-04 3.549e-04 -1.746 0.080862 .
## PC121 5.632e-04 3.575e-04 1.575 0.115248
## PC122 -2.975e-04 3.603e-04 -0.826 0.408912
## PC123 -2.495e-04 3.571e-04 -0.699 0.484670
## PC124 -3.537e-04 3.567e-04 -0.992 0.321316
## PC125 2.699e-04 3.583e-04 0.753 0.451198
## PC126 8.272e-04 3.568e-04 2.318 0.020478 *
## PC127 7.749e-04 3.551e-04 2.182 0.029155 *
## PC128 -3.578e-04 3.611e-04 -0.991 0.321817
## PC129 -8.926e-04 3.592e-04 -2.485 0.012987 *
## PC130 -1.232e-05 3.598e-04 -0.034 0.972678
## PC131 9.827e-04 3.608e-04 2.724 0.006477 **
## PC132 1.560e-03 3.583e-04 4.355 1.36e-05 ***
## PC133 -8.612e-04 3.614e-04 -2.383 0.017211 *
## PC134 -2.713e-04 3.610e-04 -0.752 0.452368
## PC135 -9.305e-05 3.598e-04 -0.259 0.795909
## PC136 5.593e-04 3.640e-04 1.536 0.124519
## PC137 2.179e-04 3.615e-04 0.603 0.546731
## PC138 8.440e-04 3.647e-04 2.314 0.020689 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02607 on 5189 degrees of freedom
## Multiple R-squared: 0.3188, Adjusted R-squared: 0.3007
## F-statistic: 17.6 on 138 and 5189 DF, p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)
## [1] "Number of data points that have Cook's D > 4/n: 251"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before.
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot
plotData = data.train %>%
rownames_to_column() %>%
mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
dplyr::select(type,target=one_of(label.names))
ggplot(data=plotData, aes(x=type,y=target)) +
geom_boxplot(fill='light blue',outlier.shape=NA) +
scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
theme_light() +
ggtitle('Distribution of High Leverage Points and Normal Points')
# 2 sample t-tests
plotData = data.train %>%
rownames_to_column() %>%
mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
dplyr::select(type,one_of(feature.names))
comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
, function(x) t.test(x ~ plotData$type, var.equal = TRUE))
sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
## PC2 PC10 PC21 PC34 PC49 PC70 PC75 PC108
## 0.037931076 0.007022022 0.015442896 0.047745857 0.020470681 0.033985809 0.039226247 0.010123371
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))
ggplot(mm,aes(x=type, y=value)) +
geom_boxplot()+
facet_wrap(~variable, ncol=5, scales = 'free_y') +
scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
ggtitle('Distribution of High Leverage Points and Normal Points')
# Distribution (box) Plots
mm = melt(plotData, id=c('type'))
ggplot(mm,aes(x=type, y=value)) +
geom_boxplot()+
facet_wrap(~variable, ncol=8, scales = 'free_y') +
scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
ggtitle('Distribution of High Leverage Points and Normal Points')
model.null = lm(grand.mean.formula, data.train)
summary(model.null)
##
## Call:
## lm(formula = grand.mean.formula, data = data.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11443 -0.02392 -0.00343 0.02090 0.19088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0963035 0.0004843 4329 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03619 on 5583 degrees of freedom
Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/
if (algo.forward.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
, data = data.train
, method = "leapForward"
, feature.names = feature.names)
model.forward = returned$model
id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.03467963 0.08228409 0.02689668 0.0005837199 0.01755905 0.0002999811
## 2 2 0.03442479 0.09584815 0.02672288 0.0006696668 0.01801778 0.0003448861
## 3 3 0.03415987 0.11016571 0.02652062 0.0006149829 0.02064252 0.0004941685
## 4 4 0.03374386 0.13134494 0.02610610 0.0006481791 0.01901965 0.0005078870
## 5 5 0.03335768 0.15117284 0.02583748 0.0006824675 0.02092203 0.0005513551
## 6 6 0.03333923 0.15191631 0.02584422 0.0006931797 0.01936718 0.0005251475
## 7 7 0.03327598 0.15511291 0.02581860 0.0006641517 0.01899973 0.0005259697
## 8 8 0.03317918 0.15995957 0.02575309 0.0006320762 0.01732750 0.0005018923
## 9 9 0.03307505 0.16508344 0.02567850 0.0006160087 0.01698117 0.0004669829
## 10 10 0.03289696 0.17392287 0.02551290 0.0005889235 0.01709557 0.0004697173
## 11 11 0.03291761 0.17299007 0.02548476 0.0006006470 0.01761570 0.0004844542
## 12 12 0.03291500 0.17309136 0.02547065 0.0006001467 0.01673993 0.0004830773
## 13 13 0.03289015 0.17434453 0.02546783 0.0006426586 0.01637780 0.0004957932
## 14 14 0.03289115 0.17437202 0.02545558 0.0007018654 0.01725684 0.0005217945
## 15 15 0.03284130 0.17692317 0.02541656 0.0006895316 0.01819374 0.0005112121
## 16 16 0.03285292 0.17633756 0.02542029 0.0006849775 0.01755231 0.0004742705
## 17 17 0.03288050 0.17502441 0.02542037 0.0007001761 0.01707427 0.0005108211
## 18 18 0.03284653 0.17673743 0.02538417 0.0006608252 0.01623923 0.0004841676
## 19 19 0.03281035 0.17853299 0.02536574 0.0006804638 0.01691988 0.0004861180
## 20 20 0.03282852 0.17774842 0.02536737 0.0006985725 0.01802704 0.0005441927
## 21 21 0.03282387 0.17807758 0.02536963 0.0007020519 0.01899034 0.0005247229
## 22 22 0.03281721 0.17835409 0.02535781 0.0007478792 0.01765618 0.0005565969
## 23 23 0.03279416 0.17950156 0.02533902 0.0007710354 0.01681905 0.0005626326
## 24 24 0.03275086 0.18160721 0.02530313 0.0007801356 0.01748304 0.0005713978
## 25 25 0.03274391 0.18207257 0.02528357 0.0007599734 0.01765686 0.0005473072
## 26 26 0.03275078 0.18181347 0.02528047 0.0007808991 0.01768458 0.0005607028
## 27 27 0.03275617 0.18160642 0.02531297 0.0007525466 0.01736412 0.0005702312
## 28 28 0.03274722 0.18212918 0.02531023 0.0007550846 0.01780786 0.0005603116
## 29 29 0.03274248 0.18242858 0.02530561 0.0007520921 0.01870840 0.0005649725
## 30 30 0.03276122 0.18170121 0.02531808 0.0007626415 0.02145986 0.0005924937
## 31 31 0.03276700 0.18149081 0.02530708 0.0007718902 0.02159776 0.0006159969
## 32 32 0.03275929 0.18192517 0.02529951 0.0007547615 0.02066122 0.0005950083
## 33 33 0.03275642 0.18208743 0.02529335 0.0007726665 0.02072365 0.0006097886
## 34 34 0.03276562 0.18172571 0.02528562 0.0007884826 0.01989804 0.0006231518
## 35 35 0.03275643 0.18219436 0.02527723 0.0007783770 0.01955388 0.0006217077
## 36 36 0.03275107 0.18255496 0.02528243 0.0007792909 0.01990465 0.0006096184
## 37 37 0.03273280 0.18344212 0.02527515 0.0007851491 0.02020206 0.0006189663
## 38 38 0.03272513 0.18377459 0.02528503 0.0007688132 0.01902732 0.0006127174
## 39 39 0.03270911 0.18462279 0.02527873 0.0007776991 0.01969676 0.0006010137
## 40 40 0.03274881 0.18275508 0.02531486 0.0007545026 0.01847049 0.0005809108
## 41 41 0.03274136 0.18319268 0.02531524 0.0007721779 0.01872370 0.0006023744
## 42 42 0.03274341 0.18318399 0.02532494 0.0007794003 0.01915266 0.0005981676
## 43 43 0.03272982 0.18384108 0.02531891 0.0007843113 0.02002845 0.0006119334
## 44 44 0.03272383 0.18412692 0.02532393 0.0007867139 0.02025409 0.0006176795
## 45 45 0.03270472 0.18506035 0.02529846 0.0008070006 0.02058011 0.0006212793
## 46 46 0.03269735 0.18543245 0.02528858 0.0007783044 0.01943826 0.0006055289
## 47 47 0.03268474 0.18616665 0.02528938 0.0007917638 0.02134224 0.0006278191
## 48 48 0.03265688 0.18752855 0.02527067 0.0007896322 0.02207858 0.0006340255
## 49 49 0.03265344 0.18772106 0.02525446 0.0007813617 0.02193511 0.0006306541
## 50 50 0.03264060 0.18839356 0.02523165 0.0007581522 0.02210280 0.0006171721
## 51 51 0.03264318 0.18831336 0.02523487 0.0007545332 0.02231320 0.0006054414
## 52 52 0.03263129 0.18889752 0.02522516 0.0007429341 0.02244839 0.0005995996
## 53 53 0.03261722 0.18954526 0.02521700 0.0007572466 0.02261241 0.0006094490
## 54 54 0.03260406 0.19021104 0.02520602 0.0007752339 0.02304311 0.0006273992
## 55 55 0.03261238 0.18987677 0.02521472 0.0007864547 0.02306141 0.0006407488
## 56 56 0.03261052 0.18999016 0.02521135 0.0007972300 0.02364582 0.0006578132
## 57 57 0.03260327 0.19031032 0.02519272 0.0007817686 0.02296486 0.0006358896
## 58 58 0.03261089 0.19006507 0.02519869 0.0007659209 0.02331494 0.0006246004
## 59 59 0.03261517 0.18991092 0.02518804 0.0007686961 0.02343763 0.0006400251
## 60 60 0.03260560 0.19031986 0.02518312 0.0007717041 0.02280514 0.0006269044
## 61 61 0.03258503 0.19129361 0.02516316 0.0007852949 0.02343075 0.0006420171
## 62 62 0.03258588 0.19123250 0.02516381 0.0007926924 0.02284466 0.0006436292
## 63 63 0.03257111 0.19189832 0.02515069 0.0007762863 0.02199510 0.0006330723
## 64 64 0.03256100 0.19245600 0.02514646 0.0007865942 0.02317236 0.0006374292
## 65 65 0.03254998 0.19293934 0.02515268 0.0007919837 0.02292395 0.0006465925
## 66 66 0.03255025 0.19299159 0.02516333 0.0007832775 0.02289653 0.0006329284
## 67 67 0.03254411 0.19331765 0.02515629 0.0007652304 0.02262896 0.0006097431
## 68 68 0.03253289 0.19387044 0.02514096 0.0007666892 0.02294790 0.0006183627
## 69 69 0.03252479 0.19424156 0.02512988 0.0007714158 0.02294470 0.0006242369
## 70 70 0.03251791 0.19454782 0.02513497 0.0007726349 0.02344826 0.0006247216
## 71 71 0.03252288 0.19435740 0.02513742 0.0007817703 0.02363442 0.0006255846
## 72 72 0.03251670 0.19465653 0.02512613 0.0007858251 0.02365387 0.0006206417
## 73 73 0.03251126 0.19493123 0.02513325 0.0007815723 0.02366288 0.0006158133
## 74 74 0.03251022 0.19502016 0.02513308 0.0007688479 0.02348401 0.0006101862
## 75 75 0.03250184 0.19545061 0.02513201 0.0007715226 0.02360648 0.0006145732
## 76 76 0.03250992 0.19510979 0.02513718 0.0007715749 0.02407926 0.0006198025
## 77 77 0.03252202 0.19461914 0.02515128 0.0007820404 0.02464484 0.0006310097
## 78 78 0.03251902 0.19473902 0.02514683 0.0007882468 0.02381499 0.0006352023
## 79 79 0.03249980 0.19559464 0.02513026 0.0007885672 0.02369816 0.0006374617
## 80 80 0.03249390 0.19584469 0.02513062 0.0007818181 0.02332497 0.0006444959
## 81 81 0.03249569 0.19577386 0.02513205 0.0007799144 0.02309096 0.0006431834
## 82 82 0.03249353 0.19591951 0.02513603 0.0007897934 0.02377980 0.0006555678
## 83 83 0.03249619 0.19582865 0.02513634 0.0008021332 0.02385513 0.0006589456
## 84 84 0.03249234 0.19601940 0.02513694 0.0007958498 0.02368722 0.0006561300
## 85 85 0.03249058 0.19612063 0.02514363 0.0007924024 0.02385745 0.0006541214
## 86 86 0.03248392 0.19642923 0.02513816 0.0007842825 0.02364808 0.0006464797
## 87 87 0.03247319 0.19693759 0.02513056 0.0007750023 0.02379821 0.0006407801
## 88 88 0.03246894 0.19713148 0.02512504 0.0007738594 0.02377237 0.0006456344
## 89 89 0.03247160 0.19705435 0.02512687 0.0007722282 0.02375451 0.0006438975
## 90 90 0.03247634 0.19684827 0.02512544 0.0007744665 0.02377566 0.0006465266
## 91 91 0.03247165 0.19708338 0.02512616 0.0007754172 0.02394506 0.0006477233
## 92 92 0.03246893 0.19719213 0.02512729 0.0007647697 0.02394705 0.0006436013
## 93 93 0.03246395 0.19745474 0.02512683 0.0007698467 0.02471194 0.0006544658
## 94 94 0.03246343 0.19750829 0.02512251 0.0007703953 0.02474501 0.0006517236
## 95 95 0.03245853 0.19775077 0.02511543 0.0007698506 0.02489160 0.0006472834
## 96 96 0.03244968 0.19814110 0.02510522 0.0007729884 0.02491682 0.0006505106
## 97 97 0.03244112 0.19852623 0.02509347 0.0007731527 0.02508986 0.0006539841
## 98 98 0.03244202 0.19851605 0.02509713 0.0007766195 0.02541039 0.0006599264
## 99 99 0.03244029 0.19856494 0.02509394 0.0007763590 0.02517093 0.0006594982
## 100 100 0.03242578 0.19921787 0.02508321 0.0007759773 0.02504508 0.0006616697
## 101 101 0.03241853 0.19954239 0.02507774 0.0007758078 0.02518189 0.0006631844
## 102 102 0.03241591 0.19965453 0.02507708 0.0007729732 0.02517536 0.0006635895
## 103 103 0.03240499 0.20015232 0.02507482 0.0007739621 0.02525908 0.0006666102
## 104 104 0.03239982 0.20040189 0.02506789 0.0007649861 0.02520672 0.0006598198
## 105 105 0.03240117 0.20034924 0.02507373 0.0007630631 0.02543303 0.0006598189
## 106 106 0.03239840 0.20048181 0.02507193 0.0007612769 0.02531828 0.0006547925
## 107 107 0.03240315 0.20024726 0.02507623 0.0007630758 0.02517725 0.0006575062
## 108 108 0.03240435 0.20020089 0.02507582 0.0007635573 0.02502667 0.0006602222
## 109 109 0.03239873 0.20048456 0.02507125 0.0007654171 0.02519034 0.0006609220
## 110 110 0.03239459 0.20065839 0.02506665 0.0007677123 0.02476335 0.0006611028
## 111 111 0.03239716 0.20053929 0.02507084 0.0007686129 0.02477107 0.0006616613
## 112 112 0.03239710 0.20054014 0.02506915 0.0007726370 0.02482303 0.0006640473
## 113 113 0.03239941 0.20044980 0.02507223 0.0007748712 0.02513380 0.0006668409
## 114 114 0.03240234 0.20031797 0.02507529 0.0007735815 0.02505888 0.0006645252
## 115 115 0.03239845 0.20049766 0.02507418 0.0007672455 0.02488664 0.0006612124
## 116 116 0.03239582 0.20061452 0.02506970 0.0007696846 0.02493590 0.0006630723
## 117 117 0.03239498 0.20065914 0.02506741 0.0007719129 0.02510054 0.0006637116
## 118 118 0.03239517 0.20064767 0.02506808 0.0007733197 0.02507145 0.0006639964
## 119 119 0.03239868 0.20049399 0.02507132 0.0007750974 0.02505414 0.0006636076
## 120 120 0.03240049 0.20040514 0.02507188 0.0007687080 0.02492257 0.0006569604
## 121 121 0.03239859 0.20049541 0.02507214 0.0007692483 0.02497651 0.0006584755
## 122 122 0.03239859 0.20049254 0.02507250 0.0007697507 0.02495424 0.0006582021
## 123 123 0.03240069 0.20040237 0.02507444 0.0007664690 0.02492546 0.0006548281
## 124 124 0.03240037 0.20042131 0.02507205 0.0007687254 0.02502678 0.0006554686
## 125 125 0.03240377 0.20026253 0.02507517 0.0007683187 0.02501472 0.0006547100
## 126 126 0.03240443 0.20023559 0.02507431 0.0007652942 0.02497196 0.0006546665
## 127 127 0.03240335 0.20028453 0.02507365 0.0007657492 0.02497180 0.0006549634
## 128 128 0.03240499 0.20021101 0.02507490 0.0007648377 0.02495368 0.0006537676
## 129 129 0.03240521 0.20019865 0.02507561 0.0007641579 0.02499913 0.0006546046
## 130 130 0.03240618 0.20015443 0.02507559 0.0007641984 0.02501429 0.0006540456
## 131 131 0.03240604 0.20015733 0.02507552 0.0007640321 0.02503097 0.0006538033
## 132 132 0.03240666 0.20012907 0.02507585 0.0007640936 0.02504140 0.0006532104
## 133 133 0.03240726 0.20009977 0.02507666 0.0007639656 0.02504310 0.0006536631
## 134 134 0.03240786 0.20007536 0.02507707 0.0007641164 0.02504665 0.0006529795
## 135 135 0.03240758 0.20008570 0.02507672 0.0007642833 0.02504061 0.0006528860
## 136 136 0.03240816 0.20006010 0.02507680 0.0007643459 0.02505548 0.0006530196
## 137 137 0.03240795 0.20006894 0.02507642 0.0007643151 0.02506310 0.0006529651
## 138 138 0.03240803 0.20006564 0.02507649 0.0007645271 0.02506156 0.0006530139
## [1] "Best Model"
## nvmax
## 110 110
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients of final model:"
## Estimate 2.5 % 97.5 %
## (Intercept) 2.0965368703 2.095697e+00 2.097377e+00
## PC1 -0.0009753762 -1.154814e-03 -7.959383e-04
## PC2 0.0012035314 1.005546e-03 1.401517e-03
## PC3 -0.0002767035 -4.948937e-04 -5.851338e-05
## PC4 0.0001683756 -5.192600e-05 3.886772e-04
## PC5 0.0006826866 4.583826e-04 9.069907e-04
## PC7 -0.0005099621 -7.392059e-04 -2.807183e-04
## PC8 -0.0002067595 -4.352876e-04 2.176856e-05
## PC9 -0.0002314497 -4.702156e-04 7.316330e-06
## PC10 0.0002611245 1.863521e-05 5.036139e-04
## PC11 -0.0013627613 -1.610508e-03 -1.115014e-03
## PC12 0.0003546051 1.072602e-04 6.019500e-04
## PC13 0.0004167756 1.611092e-04 6.724419e-04
## PC14 0.0015001118 1.246140e-03 1.754083e-03
## PC15 -0.0003584491 -6.189019e-04 -9.799628e-05
## PC16 0.0007904896 5.243205e-04 1.056659e-03
## PC17 -0.0001736328 -4.484116e-04 1.011461e-04
## PC18 0.0004064056 1.241334e-04 6.886777e-04
## PC19 -0.0003290534 -6.193746e-04 -3.873210e-05
## PC20 0.0009086458 6.043066e-04 1.212985e-03
## PC21 -0.0010243370 -1.335834e-03 -7.128398e-04
## PC22 0.0040105844 3.685930e-03 4.335238e-03
## PC23 -0.0005271298 -1.179198e-03 1.249380e-04
## PC24 -0.0014519611 -2.177307e-03 -7.266149e-04
## PC25 0.0009727363 2.537141e-04 1.691759e-03
## PC26 -0.0004758081 -1.200151e-03 2.485353e-04
## PC27 -0.0003703819 -1.098387e-03 3.576233e-04
## PC28 0.0007845835 5.403101e-05 1.515136e-03
## PC29 -0.0007201114 -1.451054e-03 1.083162e-05
## PC30 0.0006503129 -8.115961e-05 1.381786e-03
## PC31 -0.0003568879 -1.091877e-03 3.781014e-04
## PC32 0.0013278584 5.974717e-04 2.058245e-03
## PC36 0.0005845222 -1.552992e-04 1.324344e-03
## PC37 -0.0003920529 -1.130908e-03 3.468022e-04
## PC38 0.0004271293 -3.206515e-04 1.174910e-03
## PC39 0.0003401916 -4.101892e-04 1.090572e-03
## PC40 0.0003206294 -4.231597e-04 1.064418e-03
## PC41 -0.0007005179 -1.441132e-03 4.009582e-05
## PC42 -0.0005636011 -1.309979e-03 1.827765e-04
## PC43 0.0007001334 -5.084190e-05 1.451109e-03
## PC44 -0.0005123994 -1.259461e-03 2.346622e-04
## PC45 0.0012269099 4.783201e-04 1.975500e-03
## PC46 0.0015047325 7.527743e-04 2.256691e-03
## PC47 0.0005506639 -2.045439e-04 1.305872e-03
## PC50 -0.0008826750 -1.638728e-03 -1.266224e-04
## PC51 0.0005462567 -2.024028e-04 1.294916e-03
## PC52 0.0004098906 -3.434558e-04 1.163237e-03
## PC53 -0.0005576225 -1.319556e-03 2.043114e-04
## PC57 0.0004296186 -3.321397e-04 1.191377e-03
## PC58 -0.0005527647 -1.318095e-03 2.125653e-04
## PC59 0.0012702222 5.016661e-04 2.038778e-03
## PC60 -0.0008017669 -1.572844e-03 -3.068976e-05
## PC61 -0.0002846938 -1.062817e-03 4.934299e-04
## PC62 -0.0005640088 -1.331659e-03 2.036412e-04
## PC63 0.0006734582 -1.007575e-04 1.447674e-03
## PC65 0.0008926788 1.148954e-04 1.670462e-03
## PC67 0.0004050070 -3.737709e-04 1.183785e-03
## PC68 0.0008858627 1.080092e-04 1.663716e-03
## PC69 -0.0012529641 -2.032539e-03 -4.733894e-04
## PC71 -0.0008268591 -1.608376e-03 -4.534231e-05
## PC72 0.0008356291 5.699689e-05 1.614261e-03
## PC73 0.0005921479 -1.864336e-04 1.370729e-03
## PC75 0.0002538745 -5.370266e-04 1.044776e-03
## PC76 0.0016080450 8.253431e-04 2.390747e-03
## PC78 -0.0003849174 -1.174979e-03 4.051447e-04
## PC79 0.0008139782 2.750905e-05 1.600447e-03
## PC80 -0.0003822184 -1.170784e-03 4.063477e-04
## PC81 -0.0005777761 -1.373533e-03 2.179804e-04
## PC82 -0.0011160961 -1.905057e-03 -3.271354e-04
## PC83 0.0011666680 3.769274e-04 1.956409e-03
## PC85 0.0004159382 -3.784671e-04 1.210344e-03
## PC86 0.0005542520 -2.483770e-04 1.356881e-03
## PC87 0.0005841709 -2.138372e-04 1.382179e-03
## PC88 0.0004275564 -3.716779e-04 1.226791e-03
## PC89 0.0002772053 -5.162544e-04 1.070665e-03
## PC90 -0.0023723844 -3.170924e-03 -1.573844e-03
## PC91 0.0003996476 -3.990746e-04 1.198370e-03
## PC92 0.0002769611 -5.261904e-04 1.080113e-03
## PC94 0.0005395375 -2.654395e-04 1.344514e-03
## PC96 -0.0008826530 -1.691488e-03 -7.381836e-05
## PC97 -0.0003003734 -1.108344e-03 5.075967e-04
## PC98 0.0005875189 -2.247114e-04 1.399749e-03
## PC101 -0.0009914517 -1.799574e-03 -1.833293e-04
## PC102 0.0014484484 6.327960e-04 2.264101e-03
## PC104 0.0006386730 -1.761698e-04 1.453516e-03
## PC105 0.0002752159 -5.431010e-04 1.093533e-03
## PC106 0.0009514398 1.319122e-04 1.770967e-03
## PC107 0.0003922184 -4.250027e-04 1.209439e-03
## PC108 -0.0012110732 -2.030131e-03 -3.920157e-04
## PC109 -0.0003256071 -1.145878e-03 4.946643e-04
## PC110 -0.0004121828 -1.231737e-03 4.073715e-04
## PC111 -0.0008118751 -1.633126e-03 9.376251e-06
## PC112 0.0009103789 8.847136e-05 1.732287e-03
## PC114 -0.0003074862 -1.133391e-03 5.184185e-04
## PC115 -0.0006585795 -1.481574e-03 1.644146e-04
## PC117 -0.0016856941 -2.512477e-03 -8.589110e-04
## PC118 -0.0009465493 -1.775062e-03 -1.180364e-04
## PC119 -0.0008246670 -1.659911e-03 1.057664e-05
## PC120 -0.0005478879 -1.379798e-03 2.840228e-04
## PC121 0.0004080198 -4.288019e-04 1.244842e-03
## PC123 -0.0007145140 -1.551468e-03 1.224401e-04
## PC126 0.0010369835 1.990034e-04 1.874964e-03
## PC127 0.0010668332 2.360861e-04 1.897580e-03
## PC128 -0.0004767245 -1.320809e-03 3.673603e-04
## PC129 -0.0007336509 -1.574362e-03 1.070599e-04
## PC131 0.0011602279 3.167657e-04 2.003690e-03
## PC132 0.0015102884 6.702975e-04 2.350279e-03
## PC133 -0.0004892188 -1.335368e-03 3.569302e-04
## PC134 -0.0002865181 -1.131822e-03 5.587861e-04
## PC136 0.0006394666 -2.115833e-04 1.490516e-03
## PC138 0.0011348902 2.795909e-04 1.990190e-03
if (algo.forward.caret == TRUE){
test.model(model=model.forward, test=data.test
,method = 'leapForward',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,id = id
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.033 2.086 2.098 2.097 2.110 2.159
## [1] "leapForward Test MSE: 0.00103301010705246"
if (algo.backward.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "leapBackward"
,feature.names = feature.names)
model.backward = returned$model
id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.03467963 0.08228409 0.02689668 0.0005837199 0.01755905 0.0002999811
## 2 2 0.03442479 0.09584815 0.02672288 0.0006696668 0.01801778 0.0003448861
## 3 3 0.03415987 0.11016571 0.02652062 0.0006149829 0.02064252 0.0004941685
## 4 4 0.03374386 0.13134494 0.02610610 0.0006481791 0.01901965 0.0005078870
## 5 5 0.03335768 0.15117284 0.02583748 0.0006824675 0.02092203 0.0005513551
## 6 6 0.03333923 0.15191631 0.02584422 0.0006931797 0.01936718 0.0005251475
## 7 7 0.03327598 0.15511291 0.02581860 0.0006641517 0.01899973 0.0005259697
## 8 8 0.03317918 0.15995957 0.02575309 0.0006320762 0.01732750 0.0005018923
## 9 9 0.03307505 0.16508344 0.02567850 0.0006160087 0.01698117 0.0004669829
## 10 10 0.03289696 0.17392287 0.02551290 0.0005889235 0.01709557 0.0004697173
## 11 11 0.03291761 0.17299007 0.02548476 0.0006006470 0.01761570 0.0004844542
## 12 12 0.03291500 0.17309136 0.02547065 0.0006001467 0.01673993 0.0004830773
## 13 13 0.03289015 0.17434453 0.02546783 0.0006426586 0.01637780 0.0004957932
## 14 14 0.03289115 0.17437202 0.02545558 0.0007018654 0.01725684 0.0005217945
## 15 15 0.03284130 0.17692317 0.02541656 0.0006895316 0.01819374 0.0005112121
## 16 16 0.03285292 0.17633756 0.02542029 0.0006849775 0.01755231 0.0004742705
## 17 17 0.03288050 0.17502441 0.02542037 0.0007001761 0.01707427 0.0005108211
## 18 18 0.03284653 0.17673743 0.02538417 0.0006608252 0.01623923 0.0004841676
## 19 19 0.03281035 0.17853299 0.02536574 0.0006804638 0.01691988 0.0004861180
## 20 20 0.03282852 0.17774842 0.02536737 0.0006985725 0.01802704 0.0005441927
## 21 21 0.03282387 0.17807758 0.02536963 0.0007020519 0.01899034 0.0005247229
## 22 22 0.03281721 0.17835409 0.02535781 0.0007478792 0.01765618 0.0005565969
## 23 23 0.03281606 0.17850053 0.02536217 0.0007643369 0.01726868 0.0005585367
## 24 24 0.03277458 0.18057033 0.02532659 0.0007737236 0.01810310 0.0005698552
## 25 25 0.03275424 0.18161604 0.02529412 0.0007729785 0.01852917 0.0005599684
## 26 26 0.03274555 0.18205508 0.02527989 0.0007743612 0.01720570 0.0005599813
## 27 27 0.03275617 0.18160642 0.02531297 0.0007525466 0.01736412 0.0005702312
## 28 28 0.03274722 0.18212918 0.02531023 0.0007550846 0.01780786 0.0005603116
## 29 29 0.03274248 0.18242858 0.02530561 0.0007520921 0.01870840 0.0005649725
## 30 30 0.03276122 0.18170121 0.02531808 0.0007626415 0.02145986 0.0005924937
## 31 31 0.03276700 0.18149081 0.02530708 0.0007718902 0.02159776 0.0006159969
## 32 32 0.03276724 0.18158825 0.02530714 0.0007508678 0.02098200 0.0005942022
## 33 33 0.03275745 0.18206617 0.02530168 0.0007722228 0.02075349 0.0006026849
## 34 34 0.03277003 0.18154548 0.02529327 0.0007861449 0.02004310 0.0006166167
## 35 35 0.03275854 0.18209049 0.02528278 0.0007758899 0.01936050 0.0006102044
## 36 36 0.03275283 0.18247046 0.02528710 0.0007772115 0.01974863 0.0006000074
## 37 37 0.03273280 0.18344212 0.02527515 0.0007851491 0.02020206 0.0006189663
## 38 38 0.03273120 0.18347258 0.02529402 0.0007659843 0.01880030 0.0006041981
## 39 39 0.03272612 0.18381833 0.02529317 0.0007709036 0.01920706 0.0005886535
## 40 40 0.03274881 0.18275508 0.02531486 0.0007545026 0.01847049 0.0005809108
## 41 41 0.03274517 0.18300083 0.02530796 0.0007733138 0.01867483 0.0005927297
## 42 42 0.03273334 0.18360593 0.02530827 0.0007869220 0.01914135 0.0005976127
## 43 43 0.03272596 0.18397989 0.02530937 0.0007812702 0.01921906 0.0006080321
## 44 44 0.03272226 0.18421630 0.02531490 0.0007869013 0.02027323 0.0006170151
## 45 45 0.03269645 0.18544399 0.02529149 0.0007960764 0.02066754 0.0006197800
## 46 46 0.03269297 0.18563802 0.02528636 0.0007819934 0.01976476 0.0006146125
## 47 47 0.03268554 0.18608130 0.02529066 0.0007865328 0.02077976 0.0006225590
## 48 48 0.03266589 0.18705427 0.02527464 0.0007731584 0.02069157 0.0006127158
## 49 49 0.03264013 0.18832802 0.02524665 0.0007636926 0.02184782 0.0006215270
## 50 50 0.03262778 0.18894371 0.02521976 0.0007605484 0.02251121 0.0006210315
## 51 51 0.03262757 0.18902082 0.02522083 0.0007627160 0.02276121 0.0006188413
## 52 52 0.03262166 0.18933341 0.02521727 0.0007484738 0.02278048 0.0006090828
## 53 53 0.03261484 0.18967629 0.02521303 0.0007582230 0.02247997 0.0006091574
## 54 54 0.03260950 0.18998952 0.02520686 0.0007862914 0.02306254 0.0006330481
## 55 55 0.03261712 0.18965289 0.02521829 0.0007981025 0.02309736 0.0006478188
## 56 56 0.03261504 0.18977405 0.02521836 0.0007912740 0.02325657 0.0006444193
## 57 57 0.03260563 0.19019966 0.02519422 0.0007786817 0.02276605 0.0006330119
## 58 58 0.03260971 0.19011302 0.02519044 0.0007662001 0.02333873 0.0006236315
## 59 59 0.03261659 0.18983970 0.02519206 0.0007642888 0.02318823 0.0006217051
## 60 60 0.03260440 0.19036368 0.02517719 0.0007718755 0.02282656 0.0006264902
## 61 61 0.03259218 0.19095432 0.02516626 0.0007821174 0.02307645 0.0006400026
## 62 62 0.03259303 0.19089492 0.02516661 0.0007900837 0.02252547 0.0006416166
## 63 63 0.03258217 0.19142166 0.02515979 0.0007718461 0.02203288 0.0006301853
## 64 64 0.03256364 0.19234287 0.02515018 0.0007856301 0.02318622 0.0006364208
## 65 65 0.03255628 0.19268552 0.02515685 0.0007904710 0.02308603 0.0006461861
## 66 66 0.03255017 0.19299142 0.02516438 0.0007833296 0.02289637 0.0006317919
## 67 67 0.03253868 0.19358761 0.02514991 0.0007654475 0.02272480 0.0006101303
## 68 68 0.03252983 0.19403305 0.02513689 0.0007669214 0.02300828 0.0006175003
## 69 69 0.03251754 0.19459792 0.02512558 0.0007722772 0.02305476 0.0006234664
## 70 70 0.03251374 0.19476325 0.02513282 0.0007733999 0.02350914 0.0006256036
## 71 71 0.03251851 0.19456018 0.02513564 0.0007831339 0.02363440 0.0006259255
## 72 72 0.03250841 0.19505513 0.02512635 0.0007881768 0.02377957 0.0006196110
## 73 73 0.03251057 0.19496552 0.02513355 0.0007819520 0.02370406 0.0006157345
## 74 74 0.03250952 0.19505437 0.02513277 0.0007692197 0.02352432 0.0006102482
## 75 75 0.03250184 0.19545061 0.02513201 0.0007715226 0.02360648 0.0006145732
## 76 76 0.03251050 0.19508395 0.02514031 0.0007712470 0.02404834 0.0006190926
## 77 77 0.03251979 0.19470836 0.02514972 0.0007812505 0.02450920 0.0006301492
## 78 78 0.03251731 0.19480519 0.02514165 0.0007872251 0.02364128 0.0006346228
## 79 79 0.03250661 0.19526827 0.02513272 0.0007804377 0.02308486 0.0006356887
## 80 80 0.03250361 0.19538470 0.02514052 0.0007756830 0.02287733 0.0006384733
## 81 81 0.03250243 0.19548840 0.02513783 0.0007809905 0.02329755 0.0006452859
## 82 82 0.03249485 0.19587348 0.02513612 0.0007899386 0.02382548 0.0006538916
## 83 83 0.03249926 0.19568511 0.02513794 0.0007977782 0.02357484 0.0006555270
## 84 84 0.03249764 0.19578373 0.02514249 0.0007905784 0.02335865 0.0006491801
## 85 85 0.03249020 0.19613918 0.02514446 0.0007926077 0.02387080 0.0006533251
## 86 86 0.03248392 0.19642923 0.02513816 0.0007842825 0.02364808 0.0006464797
## 87 87 0.03247319 0.19693759 0.02513056 0.0007750023 0.02379821 0.0006407801
## 88 88 0.03246894 0.19713148 0.02512504 0.0007738594 0.02377237 0.0006456344
## 89 89 0.03247160 0.19705435 0.02512687 0.0007722282 0.02375451 0.0006438975
## 90 90 0.03247634 0.19684827 0.02512544 0.0007744665 0.02377566 0.0006465266
## 91 91 0.03247165 0.19708338 0.02512616 0.0007754172 0.02394506 0.0006477233
## 92 92 0.03246893 0.19719213 0.02512729 0.0007647697 0.02394705 0.0006436013
## 93 93 0.03246443 0.19745046 0.02512434 0.0007639035 0.02452758 0.0006459812
## 94 94 0.03246587 0.19740582 0.02512102 0.0007673330 0.02469969 0.0006449982
## 95 95 0.03245853 0.19775077 0.02511543 0.0007698506 0.02489160 0.0006472834
## 96 96 0.03245107 0.19808767 0.02510572 0.0007747480 0.02498954 0.0006512270
## 97 97 0.03244268 0.19848268 0.02509407 0.0007751146 0.02514835 0.0006548558
## 98 98 0.03244221 0.19852521 0.02509709 0.0007768580 0.02539823 0.0006598683
## 99 99 0.03244029 0.19856494 0.02509394 0.0007763590 0.02517093 0.0006594982
## 100 100 0.03242578 0.19921787 0.02508321 0.0007759773 0.02504508 0.0006616697
## 101 101 0.03241853 0.19954239 0.02507774 0.0007758078 0.02518189 0.0006631844
## 102 102 0.03241591 0.19965453 0.02507708 0.0007729732 0.02517536 0.0006635895
## 103 103 0.03240499 0.20015232 0.02507482 0.0007739621 0.02525908 0.0006666102
## 104 104 0.03239982 0.20040189 0.02506789 0.0007649861 0.02520672 0.0006598198
## 105 105 0.03240117 0.20034924 0.02507373 0.0007630631 0.02543303 0.0006598189
## 106 106 0.03239840 0.20048181 0.02507193 0.0007612769 0.02531828 0.0006547925
## 107 107 0.03240175 0.20031187 0.02507593 0.0007638748 0.02515644 0.0006575391
## 108 108 0.03240294 0.20026608 0.02507538 0.0007644001 0.02500794 0.0006602883
## 109 109 0.03239873 0.20048456 0.02507125 0.0007654171 0.02519034 0.0006609220
## 110 110 0.03239459 0.20065839 0.02506665 0.0007677123 0.02476335 0.0006611028
## 111 111 0.03239797 0.20049773 0.02507160 0.0007696446 0.02482828 0.0006627744
## 112 112 0.03239876 0.20046701 0.02507007 0.0007747777 0.02492520 0.0006654006
## 113 113 0.03239941 0.20044980 0.02507223 0.0007748712 0.02513380 0.0006668409
## 114 114 0.03240234 0.20031797 0.02507529 0.0007735815 0.02505888 0.0006645252
## 115 115 0.03239845 0.20049766 0.02507418 0.0007672455 0.02488664 0.0006612124
## 116 116 0.03239582 0.20061452 0.02506970 0.0007696846 0.02493590 0.0006630723
## 117 117 0.03239498 0.20065914 0.02506741 0.0007719129 0.02510054 0.0006637116
## 118 118 0.03239517 0.20064767 0.02506808 0.0007733197 0.02507145 0.0006639964
## 119 119 0.03239868 0.20049399 0.02507132 0.0007750974 0.02505414 0.0006636076
## 120 120 0.03240049 0.20040514 0.02507188 0.0007687080 0.02492257 0.0006569604
## 121 121 0.03239859 0.20049541 0.02507214 0.0007692483 0.02497651 0.0006584755
## 122 122 0.03239859 0.20049254 0.02507250 0.0007697507 0.02495424 0.0006582021
## 123 123 0.03240069 0.20040237 0.02507444 0.0007664690 0.02492546 0.0006548281
## 124 124 0.03240037 0.20042131 0.02507205 0.0007687254 0.02502678 0.0006554686
## 125 125 0.03240377 0.20026253 0.02507517 0.0007683187 0.02501472 0.0006547100
## 126 126 0.03240443 0.20023559 0.02507431 0.0007652942 0.02497196 0.0006546665
## 127 127 0.03240335 0.20028453 0.02507365 0.0007657492 0.02497180 0.0006549634
## 128 128 0.03240499 0.20021101 0.02507490 0.0007648377 0.02495368 0.0006537676
## 129 129 0.03240521 0.20019865 0.02507561 0.0007641579 0.02499913 0.0006546046
## 130 130 0.03240618 0.20015443 0.02507559 0.0007641984 0.02501429 0.0006540456
## 131 131 0.03240604 0.20015733 0.02507552 0.0007640321 0.02503097 0.0006538033
## 132 132 0.03240666 0.20012907 0.02507585 0.0007640936 0.02504140 0.0006532104
## 133 133 0.03240726 0.20009977 0.02507666 0.0007639656 0.02504310 0.0006536631
## 134 134 0.03240786 0.20007536 0.02507707 0.0007641164 0.02504665 0.0006529795
## 135 135 0.03240758 0.20008570 0.02507672 0.0007642833 0.02504061 0.0006528860
## 136 136 0.03240816 0.20006010 0.02507680 0.0007643459 0.02505548 0.0006530196
## 137 137 0.03240795 0.20006894 0.02507642 0.0007643151 0.02506310 0.0006529651
## 138 138 0.03240803 0.20006564 0.02507649 0.0007645271 0.02506156 0.0006530139
## [1] "Best Model"
## nvmax
## 110 110
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients of final model:"
## Estimate 2.5 % 97.5 %
## (Intercept) 2.0965368703 2.095697e+00 2.097377e+00
## PC1 -0.0009753762 -1.154814e-03 -7.959383e-04
## PC2 0.0012035314 1.005546e-03 1.401517e-03
## PC3 -0.0002767035 -4.948937e-04 -5.851338e-05
## PC4 0.0001683756 -5.192600e-05 3.886772e-04
## PC5 0.0006826866 4.583826e-04 9.069907e-04
## PC7 -0.0005099621 -7.392059e-04 -2.807183e-04
## PC8 -0.0002067595 -4.352876e-04 2.176856e-05
## PC9 -0.0002314497 -4.702156e-04 7.316330e-06
## PC10 0.0002611245 1.863521e-05 5.036139e-04
## PC11 -0.0013627613 -1.610508e-03 -1.115014e-03
## PC12 0.0003546051 1.072602e-04 6.019500e-04
## PC13 0.0004167756 1.611092e-04 6.724419e-04
## PC14 0.0015001118 1.246140e-03 1.754083e-03
## PC15 -0.0003584491 -6.189019e-04 -9.799628e-05
## PC16 0.0007904896 5.243205e-04 1.056659e-03
## PC17 -0.0001736328 -4.484116e-04 1.011461e-04
## PC18 0.0004064056 1.241334e-04 6.886777e-04
## PC19 -0.0003290534 -6.193746e-04 -3.873210e-05
## PC20 0.0009086458 6.043066e-04 1.212985e-03
## PC21 -0.0010243370 -1.335834e-03 -7.128398e-04
## PC22 0.0040105844 3.685930e-03 4.335238e-03
## PC23 -0.0005271298 -1.179198e-03 1.249380e-04
## PC24 -0.0014519611 -2.177307e-03 -7.266149e-04
## PC25 0.0009727363 2.537141e-04 1.691759e-03
## PC26 -0.0004758081 -1.200151e-03 2.485353e-04
## PC27 -0.0003703819 -1.098387e-03 3.576233e-04
## PC28 0.0007845835 5.403101e-05 1.515136e-03
## PC29 -0.0007201114 -1.451054e-03 1.083162e-05
## PC30 0.0006503129 -8.115961e-05 1.381786e-03
## PC31 -0.0003568879 -1.091877e-03 3.781014e-04
## PC32 0.0013278584 5.974717e-04 2.058245e-03
## PC36 0.0005845222 -1.552992e-04 1.324344e-03
## PC37 -0.0003920529 -1.130908e-03 3.468022e-04
## PC38 0.0004271293 -3.206515e-04 1.174910e-03
## PC39 0.0003401916 -4.101892e-04 1.090572e-03
## PC40 0.0003206294 -4.231597e-04 1.064418e-03
## PC41 -0.0007005179 -1.441132e-03 4.009582e-05
## PC42 -0.0005636011 -1.309979e-03 1.827765e-04
## PC43 0.0007001334 -5.084190e-05 1.451109e-03
## PC44 -0.0005123994 -1.259461e-03 2.346622e-04
## PC45 0.0012269099 4.783201e-04 1.975500e-03
## PC46 0.0015047325 7.527743e-04 2.256691e-03
## PC47 0.0005506639 -2.045439e-04 1.305872e-03
## PC50 -0.0008826750 -1.638728e-03 -1.266224e-04
## PC51 0.0005462567 -2.024028e-04 1.294916e-03
## PC52 0.0004098906 -3.434558e-04 1.163237e-03
## PC53 -0.0005576225 -1.319556e-03 2.043114e-04
## PC57 0.0004296186 -3.321397e-04 1.191377e-03
## PC58 -0.0005527647 -1.318095e-03 2.125653e-04
## PC59 0.0012702222 5.016661e-04 2.038778e-03
## PC60 -0.0008017669 -1.572844e-03 -3.068976e-05
## PC61 -0.0002846938 -1.062817e-03 4.934299e-04
## PC62 -0.0005640088 -1.331659e-03 2.036412e-04
## PC63 0.0006734582 -1.007575e-04 1.447674e-03
## PC65 0.0008926788 1.148954e-04 1.670462e-03
## PC67 0.0004050070 -3.737709e-04 1.183785e-03
## PC68 0.0008858627 1.080092e-04 1.663716e-03
## PC69 -0.0012529641 -2.032539e-03 -4.733894e-04
## PC71 -0.0008268591 -1.608376e-03 -4.534231e-05
## PC72 0.0008356291 5.699689e-05 1.614261e-03
## PC73 0.0005921479 -1.864336e-04 1.370729e-03
## PC75 0.0002538745 -5.370266e-04 1.044776e-03
## PC76 0.0016080450 8.253431e-04 2.390747e-03
## PC78 -0.0003849174 -1.174979e-03 4.051447e-04
## PC79 0.0008139782 2.750905e-05 1.600447e-03
## PC80 -0.0003822184 -1.170784e-03 4.063477e-04
## PC81 -0.0005777761 -1.373533e-03 2.179804e-04
## PC82 -0.0011160961 -1.905057e-03 -3.271354e-04
## PC83 0.0011666680 3.769274e-04 1.956409e-03
## PC85 0.0004159382 -3.784671e-04 1.210344e-03
## PC86 0.0005542520 -2.483770e-04 1.356881e-03
## PC87 0.0005841709 -2.138372e-04 1.382179e-03
## PC88 0.0004275564 -3.716779e-04 1.226791e-03
## PC89 0.0002772053 -5.162544e-04 1.070665e-03
## PC90 -0.0023723844 -3.170924e-03 -1.573844e-03
## PC91 0.0003996476 -3.990746e-04 1.198370e-03
## PC92 0.0002769611 -5.261904e-04 1.080113e-03
## PC94 0.0005395375 -2.654395e-04 1.344514e-03
## PC96 -0.0008826530 -1.691488e-03 -7.381836e-05
## PC97 -0.0003003734 -1.108344e-03 5.075967e-04
## PC98 0.0005875189 -2.247114e-04 1.399749e-03
## PC101 -0.0009914517 -1.799574e-03 -1.833293e-04
## PC102 0.0014484484 6.327960e-04 2.264101e-03
## PC104 0.0006386730 -1.761698e-04 1.453516e-03
## PC105 0.0002752159 -5.431010e-04 1.093533e-03
## PC106 0.0009514398 1.319122e-04 1.770967e-03
## PC107 0.0003922184 -4.250027e-04 1.209439e-03
## PC108 -0.0012110732 -2.030131e-03 -3.920157e-04
## PC109 -0.0003256071 -1.145878e-03 4.946643e-04
## PC110 -0.0004121828 -1.231737e-03 4.073715e-04
## PC111 -0.0008118751 -1.633126e-03 9.376251e-06
## PC112 0.0009103789 8.847136e-05 1.732287e-03
## PC114 -0.0003074862 -1.133391e-03 5.184185e-04
## PC115 -0.0006585795 -1.481574e-03 1.644146e-04
## PC117 -0.0016856941 -2.512477e-03 -8.589110e-04
## PC118 -0.0009465493 -1.775062e-03 -1.180364e-04
## PC119 -0.0008246670 -1.659911e-03 1.057664e-05
## PC120 -0.0005478879 -1.379798e-03 2.840228e-04
## PC121 0.0004080198 -4.288019e-04 1.244842e-03
## PC123 -0.0007145140 -1.551468e-03 1.224401e-04
## PC126 0.0010369835 1.990034e-04 1.874964e-03
## PC127 0.0010668332 2.360861e-04 1.897580e-03
## PC128 -0.0004767245 -1.320809e-03 3.673603e-04
## PC129 -0.0007336509 -1.574362e-03 1.070599e-04
## PC131 0.0011602279 3.167657e-04 2.003690e-03
## PC132 0.0015102884 6.702975e-04 2.350279e-03
## PC133 -0.0004892188 -1.335368e-03 3.569302e-04
## PC134 -0.0002865181 -1.131822e-03 5.587861e-04
## PC136 0.0006394666 -2.115833e-04 1.490516e-03
## PC138 0.0011348902 2.795909e-04 1.990190e-03
if (algo.backward.caret == TRUE){
test.model(model.backward, data.test
,method = 'leapBackward',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,id = id
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.033 2.086 2.098 2.097 2.110 2.159
## [1] "leapBackward Test MSE: 0.00103301010705246"
if (algo.stepwise.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "leapSeq"
,feature.names = feature.names)
model.stepwise = returned$model
id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 110 on full training set
## [1] "All models results"
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.03467963 0.08228409 0.02689668 0.0005837199 0.01755905 0.0002999811
## 2 2 0.03442479 0.09584815 0.02672288 0.0006696668 0.01801778 0.0003448861
## 3 3 0.03415987 0.11016571 0.02652062 0.0006149829 0.02064252 0.0004941685
## 4 4 0.03408152 0.11289736 0.02633266 0.0009629997 0.04348569 0.0007485688
## 5 5 0.03503216 0.06308580 0.02720604 0.0013437177 0.05333940 0.0009085872
## 6 6 0.03333923 0.15191631 0.02584422 0.0006931797 0.01936718 0.0005251475
## 7 7 0.03327598 0.15511291 0.02581860 0.0006641517 0.01899973 0.0005259697
## 8 8 0.03317918 0.15995957 0.02575309 0.0006320762 0.01732750 0.0005018923
## 9 9 0.03307505 0.16508344 0.02567850 0.0006160087 0.01698117 0.0004669829
## 10 10 0.03289696 0.17392287 0.02551290 0.0005889235 0.01709557 0.0004697173
## 11 11 0.03291761 0.17299007 0.02548476 0.0006006470 0.01761570 0.0004844542
## 12 12 0.03291500 0.17309136 0.02547065 0.0006001467 0.01673993 0.0004830773
## 13 13 0.03300691 0.16796378 0.02554539 0.0007434112 0.03151237 0.0005891436
## 14 14 0.03289115 0.17437202 0.02545558 0.0007018654 0.01725684 0.0005217945
## 15 15 0.03284130 0.17692317 0.02541656 0.0006895316 0.01819374 0.0005112121
## 16 16 0.03308062 0.16512590 0.02561277 0.0013482107 0.03814246 0.0009796056
## 17 17 0.03288050 0.17502441 0.02542037 0.0007001761 0.01707427 0.0005108211
## 18 18 0.03284653 0.17673743 0.02538417 0.0006608252 0.01623923 0.0004841676
## 19 19 0.03281035 0.17853299 0.02536574 0.0006804638 0.01691988 0.0004861180
## 20 20 0.03282852 0.17774842 0.02536737 0.0006985725 0.01802704 0.0005441927
## 21 21 0.03282387 0.17807758 0.02536963 0.0007020519 0.01899034 0.0005247229
## 22 22 0.03281721 0.17835409 0.02535781 0.0007478792 0.01765618 0.0005565969
## 23 23 0.03279416 0.17950156 0.02533902 0.0007710354 0.01681905 0.0005626326
## 24 24 0.03276334 0.18107409 0.02532165 0.0007719407 0.01813685 0.0005670953
## 25 25 0.03271591 0.18315678 0.02528615 0.0007809654 0.01798981 0.0005536892
## 26 26 0.03274555 0.18205508 0.02527989 0.0007743612 0.01720570 0.0005599813
## 27 27 0.03275617 0.18160642 0.02531297 0.0007525466 0.01736412 0.0005702312
## 28 28 0.03274722 0.18212918 0.02531023 0.0007550846 0.01780786 0.0005603116
## 29 29 0.03274907 0.18210413 0.02530666 0.0007472887 0.01808281 0.0005646727
## 30 30 0.03273195 0.18323515 0.02528217 0.0006921030 0.02197366 0.0005339153
## 31 31 0.03281139 0.17932949 0.02537001 0.0007478946 0.01851996 0.0006250217
## 32 32 0.03272805 0.18313790 0.02529497 0.0007598487 0.01950916 0.0005936236
## 33 33 0.03275751 0.18206275 0.02529869 0.0007721521 0.02074739 0.0006092603
## 34 34 0.03281716 0.17905227 0.02537900 0.0007640757 0.01810062 0.0006324991
## 35 35 0.03277224 0.18126512 0.02532581 0.0007696378 0.02026166 0.0006159179
## 36 36 0.03277967 0.18072391 0.02534971 0.0007772105 0.01945689 0.0005717192
## 37 37 0.03276149 0.18203072 0.02528329 0.0007559306 0.01799419 0.0006032075
## 38 38 0.03272513 0.18377459 0.02528503 0.0007688132 0.01902732 0.0006127174
## 39 39 0.03275369 0.18233881 0.02534653 0.0007590034 0.02089033 0.0005952164
## 40 40 0.03279530 0.18038476 0.02536128 0.0007477546 0.01826648 0.0005568410
## 41 41 0.03274199 0.18284460 0.02532289 0.0007728725 0.01926670 0.0006113832
## 42 42 0.03274882 0.18287806 0.02534019 0.0007832935 0.01926607 0.0006007436
## 43 43 0.03268881 0.18507042 0.02529678 0.0007869534 0.01895837 0.0006094646
## 44 44 0.03272616 0.18403239 0.02532161 0.0007862788 0.02007694 0.0006158943
## 45 45 0.03270987 0.18478282 0.02530714 0.0008094715 0.02062010 0.0006284692
## 46 46 0.03269297 0.18563802 0.02528636 0.0007819934 0.01976476 0.0006146125
## 47 47 0.03269100 0.18585816 0.02529231 0.0007841223 0.02078207 0.0006221532
## 48 48 0.03266589 0.18705427 0.02527464 0.0007731584 0.02069157 0.0006127158
## 49 49 0.03264800 0.18796071 0.02525149 0.0007839952 0.02194731 0.0006314512
## 50 50 0.03264159 0.18832484 0.02523095 0.0007536182 0.02209447 0.0006088194
## 51 51 0.03261166 0.18984262 0.02519236 0.0007233352 0.02282395 0.0005662149
## 52 52 0.03262634 0.18879036 0.02521499 0.0007468191 0.02291659 0.0006093385
## 53 53 0.03261722 0.18954526 0.02521700 0.0007572466 0.02261241 0.0006094490
## 54 54 0.03260484 0.19020910 0.02520390 0.0007749114 0.02304499 0.0006273369
## 55 55 0.03268563 0.18609832 0.02531057 0.0007768144 0.02292621 0.0006591156
## 56 56 0.03263734 0.18869120 0.02523485 0.0007436321 0.02195299 0.0006129892
## 57 57 0.03260563 0.19019966 0.02519422 0.0007786817 0.02276605 0.0006330119
## 58 58 0.03262279 0.18912175 0.02522090 0.0007623225 0.02359935 0.0006193823
## 59 59 0.03261659 0.18983970 0.02519206 0.0007642888 0.02318823 0.0006217051
## 60 60 0.03261815 0.18947035 0.02520524 0.0007910854 0.02435107 0.0006697634
## 61 61 0.03263753 0.18860109 0.02522428 0.0007681604 0.02250355 0.0006063660
## 62 62 0.03259303 0.19089492 0.02516661 0.0007900837 0.02252547 0.0006416166
## 63 63 0.03260712 0.19006653 0.02520203 0.0007652984 0.02062956 0.0006443995
## 64 64 0.03256364 0.19234287 0.02515018 0.0007856301 0.02318622 0.0006364208
## 65 65 0.03259881 0.19048670 0.02519839 0.0007783919 0.02546733 0.0006558452
## 66 66 0.03256486 0.19190649 0.02518116 0.0007790796 0.02326214 0.0006294067
## 67 67 0.03258032 0.19153039 0.02516757 0.0007223986 0.01975966 0.0005859326
## 68 68 0.03259155 0.19093304 0.02518827 0.0007518598 0.02226614 0.0005874343
## 69 69 0.03252479 0.19424156 0.02512988 0.0007714158 0.02294470 0.0006242369
## 70 70 0.03251497 0.19470830 0.02513533 0.0007730129 0.02352078 0.0006247728
## 71 71 0.03253124 0.19358899 0.02515974 0.0007794250 0.02395276 0.0006210460
## 72 72 0.03250892 0.19502944 0.02512512 0.0007879031 0.02374897 0.0006199484
## 73 73 0.03253162 0.19354765 0.02515807 0.0007759306 0.02378695 0.0006109187
## 74 74 0.03250952 0.19505437 0.02513277 0.0007692197 0.02352432 0.0006102482
## 75 75 0.03254705 0.19325111 0.02518909 0.0007605011 0.02200360 0.0006287970
## 76 76 0.03250992 0.19510979 0.02513718 0.0007715749 0.02407926 0.0006198025
## 77 77 0.03252045 0.19450464 0.02515398 0.0007826862 0.02471267 0.0006313894
## 78 78 0.03255342 0.19293584 0.02518034 0.0008369407 0.02684396 0.0006860392
## 79 79 0.03249980 0.19559464 0.02513026 0.0007885672 0.02369816 0.0006374617
## 80 80 0.03251423 0.19458928 0.02516449 0.0007694079 0.02309650 0.0006266216
## 81 81 0.03256069 0.19157312 0.02521780 0.0007191655 0.01805450 0.0006124557
## 82 82 0.03249658 0.19570897 0.02514157 0.0007900282 0.02378043 0.0006571477
## 83 83 0.03246234 0.19683739 0.02511635 0.0007950765 0.02225026 0.0006504797
## 84 84 0.03249505 0.19589221 0.02513827 0.0007920136 0.02344032 0.0006532950
## 85 85 0.03251394 0.19496836 0.02515540 0.0007616938 0.02181309 0.0006304042
## 86 86 0.03254647 0.19342722 0.02520713 0.0007745715 0.02335332 0.0006141731
## 87 87 0.03247319 0.19693759 0.02513056 0.0007750023 0.02379821 0.0006407801
## 88 88 0.03246894 0.19713148 0.02512504 0.0007738594 0.02377237 0.0006456344
## 89 89 0.03253488 0.19400802 0.02519850 0.0007594610 0.02326180 0.0006076405
## 90 90 0.03249498 0.19591099 0.02514641 0.0007754728 0.02364027 0.0006558339
## 91 91 0.03247165 0.19708338 0.02512616 0.0007754172 0.02394506 0.0006477233
## 92 92 0.03241849 0.19905414 0.02509471 0.0007631796 0.02195452 0.0006390964
## 93 93 0.03246395 0.19745474 0.02512683 0.0007698467 0.02471194 0.0006544658
## 94 94 0.03248771 0.19632375 0.02514645 0.0007678721 0.02450056 0.0006566466
## 95 95 0.03251857 0.19436863 0.02521454 0.0007758129 0.02538281 0.0006053793
## 96 96 0.03244968 0.19814110 0.02510522 0.0007729884 0.02491682 0.0006505106
## 97 97 0.03244112 0.19852623 0.02509347 0.0007731527 0.02508986 0.0006539841
## 98 98 0.03244221 0.19852521 0.02509709 0.0007768580 0.02539823 0.0006598683
## 99 99 0.03245850 0.19754605 0.02510645 0.0008006886 0.02667332 0.0006781497
## 100 100 0.03242578 0.19921787 0.02508321 0.0007759773 0.02504508 0.0006616697
## 101 101 0.03241853 0.19954239 0.02507774 0.0007758078 0.02518189 0.0006631844
## 102 102 0.03241591 0.19965453 0.02507708 0.0007729732 0.02517536 0.0006635895
## 103 103 0.03240460 0.19995127 0.02509631 0.0007741166 0.02528635 0.0006606995
## 104 104 0.03239982 0.20040189 0.02506789 0.0007649861 0.02520672 0.0006598198
## 105 105 0.03240117 0.20034924 0.02507373 0.0007630631 0.02543303 0.0006598189
## 106 106 0.03241747 0.19955917 0.02510239 0.0007518640 0.02478953 0.0006274008
## 107 107 0.03240315 0.20024726 0.02507623 0.0007630758 0.02517725 0.0006575062
## 108 108 0.03240294 0.20026608 0.02507538 0.0007644001 0.02500794 0.0006602883
## 109 109 0.03245428 0.19768866 0.02510481 0.0007482636 0.02745015 0.0006638287
## 110 110 0.03237410 0.20125291 0.02507886 0.0007597344 0.02333334 0.0006393645
## 111 111 0.03239716 0.20053929 0.02507084 0.0007686129 0.02477107 0.0006616613
## 112 112 0.03239876 0.20046701 0.02507007 0.0007747777 0.02492520 0.0006654006
## 113 113 0.03240335 0.20004254 0.02507714 0.0008094196 0.02616571 0.0006473598
## 114 114 0.03240234 0.20031797 0.02507529 0.0007735815 0.02505888 0.0006645252
## 115 115 0.03239845 0.20049766 0.02507418 0.0007672455 0.02488664 0.0006612124
## 116 116 0.03243778 0.19848542 0.02510502 0.0007390359 0.02488888 0.0006379899
## 117 117 0.03241519 0.19968706 0.02509255 0.0007712642 0.02483248 0.0006749391
## 118 118 0.03239806 0.20047124 0.02507719 0.0007717110 0.02495168 0.0006550911
## 119 119 0.03237917 0.20123192 0.02507658 0.0007823009 0.02485415 0.0006572140
## 120 120 0.03240964 0.19994448 0.02506974 0.0007528330 0.02472773 0.0006328649
## 121 121 0.03239122 0.20083773 0.02504770 0.0007517740 0.02500886 0.0006213507
## 122 122 0.03239859 0.20049254 0.02507250 0.0007697507 0.02495424 0.0006582021
## 123 123 0.03240069 0.20040237 0.02507444 0.0007664690 0.02492546 0.0006548281
## 124 124 0.03241151 0.19987427 0.02507401 0.0007632399 0.02526600 0.0006553041
## 125 125 0.03240377 0.20026253 0.02507517 0.0007683187 0.02501472 0.0006547100
## 126 126 0.03240443 0.20023559 0.02507431 0.0007652942 0.02497196 0.0006546665
## 127 127 0.03241569 0.19969256 0.02507426 0.0007477748 0.02388997 0.0006536403
## 128 128 0.03242435 0.19918485 0.02507819 0.0007576491 0.02575903 0.0006528819
## 129 129 0.03239010 0.20086169 0.02507736 0.0007694344 0.02488871 0.0006540729
## 130 130 0.03242529 0.19915855 0.02506664 0.0007129767 0.02515401 0.0006436195
## 131 131 0.03240629 0.20003360 0.02506883 0.0007578965 0.02515077 0.0006404516
## 132 132 0.03240423 0.20018304 0.02508393 0.0007626164 0.02470996 0.0006432310
## 133 133 0.03240670 0.20008861 0.02507276 0.0007664146 0.02500670 0.0006449905
## 134 134 0.03241419 0.19978348 0.02508616 0.0007606539 0.02489179 0.0006421327
## 135 135 0.03241892 0.19952758 0.02508675 0.0007632568 0.02486841 0.0006372161
## 136 136 0.03240766 0.20008603 0.02507608 0.0007652442 0.02523859 0.0006522533
## 137 137 0.03240520 0.20019252 0.02507403 0.0007642818 0.02511679 0.0006516122
## 138 138 0.03240803 0.20006564 0.02507649 0.0007645271 0.02506156 0.0006530139
## [1] "Best Model"
## nvmax
## 110 110
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients of final model:"
## Estimate 2.5 % 97.5 %
## (Intercept) 2.0965368703 2.095697e+00 2.097377e+00
## PC1 -0.0009753762 -1.154814e-03 -7.959383e-04
## PC2 0.0012035314 1.005546e-03 1.401517e-03
## PC3 -0.0002767035 -4.948937e-04 -5.851338e-05
## PC4 0.0001683756 -5.192600e-05 3.886772e-04
## PC5 0.0006826866 4.583826e-04 9.069907e-04
## PC7 -0.0005099621 -7.392059e-04 -2.807183e-04
## PC8 -0.0002067595 -4.352876e-04 2.176856e-05
## PC9 -0.0002314497 -4.702156e-04 7.316330e-06
## PC10 0.0002611245 1.863521e-05 5.036139e-04
## PC11 -0.0013627613 -1.610508e-03 -1.115014e-03
## PC12 0.0003546051 1.072602e-04 6.019500e-04
## PC13 0.0004167756 1.611092e-04 6.724419e-04
## PC14 0.0015001118 1.246140e-03 1.754083e-03
## PC15 -0.0003584491 -6.189019e-04 -9.799628e-05
## PC16 0.0007904896 5.243205e-04 1.056659e-03
## PC17 -0.0001736328 -4.484116e-04 1.011461e-04
## PC18 0.0004064056 1.241334e-04 6.886777e-04
## PC19 -0.0003290534 -6.193746e-04 -3.873210e-05
## PC20 0.0009086458 6.043066e-04 1.212985e-03
## PC21 -0.0010243370 -1.335834e-03 -7.128398e-04
## PC22 0.0040105844 3.685930e-03 4.335238e-03
## PC23 -0.0005271298 -1.179198e-03 1.249380e-04
## PC24 -0.0014519611 -2.177307e-03 -7.266149e-04
## PC25 0.0009727363 2.537141e-04 1.691759e-03
## PC26 -0.0004758081 -1.200151e-03 2.485353e-04
## PC27 -0.0003703819 -1.098387e-03 3.576233e-04
## PC28 0.0007845835 5.403101e-05 1.515136e-03
## PC29 -0.0007201114 -1.451054e-03 1.083162e-05
## PC30 0.0006503129 -8.115961e-05 1.381786e-03
## PC31 -0.0003568879 -1.091877e-03 3.781014e-04
## PC32 0.0013278584 5.974717e-04 2.058245e-03
## PC36 0.0005845222 -1.552992e-04 1.324344e-03
## PC37 -0.0003920529 -1.130908e-03 3.468022e-04
## PC38 0.0004271293 -3.206515e-04 1.174910e-03
## PC39 0.0003401916 -4.101892e-04 1.090572e-03
## PC40 0.0003206294 -4.231597e-04 1.064418e-03
## PC41 -0.0007005179 -1.441132e-03 4.009582e-05
## PC42 -0.0005636011 -1.309979e-03 1.827765e-04
## PC43 0.0007001334 -5.084190e-05 1.451109e-03
## PC44 -0.0005123994 -1.259461e-03 2.346622e-04
## PC45 0.0012269099 4.783201e-04 1.975500e-03
## PC46 0.0015047325 7.527743e-04 2.256691e-03
## PC47 0.0005506639 -2.045439e-04 1.305872e-03
## PC50 -0.0008826750 -1.638728e-03 -1.266224e-04
## PC51 0.0005462567 -2.024028e-04 1.294916e-03
## PC52 0.0004098906 -3.434558e-04 1.163237e-03
## PC53 -0.0005576225 -1.319556e-03 2.043114e-04
## PC57 0.0004296186 -3.321397e-04 1.191377e-03
## PC58 -0.0005527647 -1.318095e-03 2.125653e-04
## PC59 0.0012702222 5.016661e-04 2.038778e-03
## PC60 -0.0008017669 -1.572844e-03 -3.068976e-05
## PC61 -0.0002846938 -1.062817e-03 4.934299e-04
## PC62 -0.0005640088 -1.331659e-03 2.036412e-04
## PC63 0.0006734582 -1.007575e-04 1.447674e-03
## PC65 0.0008926788 1.148954e-04 1.670462e-03
## PC67 0.0004050070 -3.737709e-04 1.183785e-03
## PC68 0.0008858627 1.080092e-04 1.663716e-03
## PC69 -0.0012529641 -2.032539e-03 -4.733894e-04
## PC71 -0.0008268591 -1.608376e-03 -4.534231e-05
## PC72 0.0008356291 5.699689e-05 1.614261e-03
## PC73 0.0005921479 -1.864336e-04 1.370729e-03
## PC75 0.0002538745 -5.370266e-04 1.044776e-03
## PC76 0.0016080450 8.253431e-04 2.390747e-03
## PC78 -0.0003849174 -1.174979e-03 4.051447e-04
## PC79 0.0008139782 2.750905e-05 1.600447e-03
## PC80 -0.0003822184 -1.170784e-03 4.063477e-04
## PC81 -0.0005777761 -1.373533e-03 2.179804e-04
## PC82 -0.0011160961 -1.905057e-03 -3.271354e-04
## PC83 0.0011666680 3.769274e-04 1.956409e-03
## PC85 0.0004159382 -3.784671e-04 1.210344e-03
## PC86 0.0005542520 -2.483770e-04 1.356881e-03
## PC87 0.0005841709 -2.138372e-04 1.382179e-03
## PC88 0.0004275564 -3.716779e-04 1.226791e-03
## PC89 0.0002772053 -5.162544e-04 1.070665e-03
## PC90 -0.0023723844 -3.170924e-03 -1.573844e-03
## PC91 0.0003996476 -3.990746e-04 1.198370e-03
## PC92 0.0002769611 -5.261904e-04 1.080113e-03
## PC94 0.0005395375 -2.654395e-04 1.344514e-03
## PC96 -0.0008826530 -1.691488e-03 -7.381836e-05
## PC97 -0.0003003734 -1.108344e-03 5.075967e-04
## PC98 0.0005875189 -2.247114e-04 1.399749e-03
## PC101 -0.0009914517 -1.799574e-03 -1.833293e-04
## PC102 0.0014484484 6.327960e-04 2.264101e-03
## PC104 0.0006386730 -1.761698e-04 1.453516e-03
## PC105 0.0002752159 -5.431010e-04 1.093533e-03
## PC106 0.0009514398 1.319122e-04 1.770967e-03
## PC107 0.0003922184 -4.250027e-04 1.209439e-03
## PC108 -0.0012110732 -2.030131e-03 -3.920157e-04
## PC109 -0.0003256071 -1.145878e-03 4.946643e-04
## PC110 -0.0004121828 -1.231737e-03 4.073715e-04
## PC111 -0.0008118751 -1.633126e-03 9.376251e-06
## PC112 0.0009103789 8.847136e-05 1.732287e-03
## PC114 -0.0003074862 -1.133391e-03 5.184185e-04
## PC115 -0.0006585795 -1.481574e-03 1.644146e-04
## PC117 -0.0016856941 -2.512477e-03 -8.589110e-04
## PC118 -0.0009465493 -1.775062e-03 -1.180364e-04
## PC119 -0.0008246670 -1.659911e-03 1.057664e-05
## PC120 -0.0005478879 -1.379798e-03 2.840228e-04
## PC121 0.0004080198 -4.288019e-04 1.244842e-03
## PC123 -0.0007145140 -1.551468e-03 1.224401e-04
## PC126 0.0010369835 1.990034e-04 1.874964e-03
## PC127 0.0010668332 2.360861e-04 1.897580e-03
## PC128 -0.0004767245 -1.320809e-03 3.673603e-04
## PC129 -0.0007336509 -1.574362e-03 1.070599e-04
## PC131 0.0011602279 3.167657e-04 2.003690e-03
## PC132 0.0015102884 6.702975e-04 2.350279e-03
## PC133 -0.0004892188 -1.335368e-03 3.569302e-04
## PC134 -0.0002865181 -1.131822e-03 5.587861e-04
## PC136 0.0006394666 -2.115833e-04 1.490516e-03
## PC138 0.0011348902 2.795909e-04 1.990190e-03
if (algo.stepwise.caret == TRUE){
test.model(model.stepwise, data.test
,method = 'leapSeq',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,id = id
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.033 2.086 2.098 2.097 2.110 2.159
## [1] "leapSeq Test MSE: 0.00103301010705247"
if (algo.LASSO.caret == TRUE){
set.seed(1)
tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "glmnet"
,subopt = 'LASSO'
,tune.grid = tune.grid
,feature.names = feature.names)
model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.000145 on full training set
## glmnet
##
## 5584 samples
## 138 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.0001000000 0.03236190 0.20105937 0.02505164
## 0.0001047616 0.03236087 0.20107268 0.02505108
## 0.0001097499 0.03235987 0.20108411 0.02505059
## 0.0001149757 0.03235902 0.20108916 0.02505025
## 0.0001204504 0.03235824 0.20109125 0.02504994
## 0.0001261857 0.03235762 0.20108618 0.02504978
## 0.0001321941 0.03235714 0.20107529 0.02504983
## 0.0001384886 0.03235684 0.20105672 0.02505005
## 0.0001450829 0.03235671 0.20103063 0.02505047
## 0.0001519911 0.03235672 0.20099938 0.02505103
## 0.0001592283 0.03235693 0.20095987 0.02505166
## 0.0001668101 0.03235731 0.20091382 0.02505242
## 0.0001747528 0.03235787 0.20086176 0.02505337
## 0.0001830738 0.03235863 0.20080214 0.02505447
## 0.0001917910 0.03235968 0.20073138 0.02505567
## 0.0002009233 0.03236096 0.20065325 0.02505697
## 0.0002104904 0.03236269 0.20055655 0.02505866
## 0.0002205131 0.03236483 0.20044367 0.02506067
## 0.0002310130 0.03236750 0.20030962 0.02506310
## 0.0002420128 0.03237082 0.20014903 0.02506586
## 0.0002535364 0.03237460 0.19997140 0.02506892
## 0.0002656088 0.03237883 0.19977786 0.02507257
## 0.0002782559 0.03238329 0.19958041 0.02507641
## 0.0002915053 0.03238812 0.19937239 0.02508088
## 0.0003053856 0.03239347 0.19914718 0.02508587
## 0.0003199267 0.03239945 0.19889884 0.02509131
## 0.0003351603 0.03240619 0.19862224 0.02509721
## 0.0003511192 0.03241335 0.19833494 0.02510323
## 0.0003678380 0.03242122 0.19802277 0.02510972
## 0.0003853529 0.03242955 0.19769932 0.02511616
## 0.0004037017 0.03243891 0.19733726 0.02512347
## 0.0004229243 0.03244931 0.19693628 0.02513134
## 0.0004430621 0.03246079 0.19649531 0.02514009
## 0.0004641589 0.03247308 0.19602820 0.02514951
## 0.0004862602 0.03248691 0.19549774 0.02516117
## 0.0005094138 0.03250204 0.19491787 0.02517408
## 0.0005336699 0.03251877 0.19427109 0.02518844
## 0.0005590810 0.03253658 0.19358637 0.02520341
## 0.0005857021 0.03255636 0.19281423 0.02522000
## 0.0006135907 0.03257792 0.19196344 0.02523807
## 0.0006428073 0.03260141 0.19102673 0.02525775
## 0.0006734151 0.03262631 0.19003400 0.02527958
## 0.0007054802 0.03265334 0.18894158 0.02530313
## 0.0007390722 0.03268001 0.18788934 0.02532709
## 0.0007742637 0.03270867 0.18674610 0.02535227
## 0.0008111308 0.03273750 0.18561151 0.02537684
## 0.0008497534 0.03276797 0.18440397 0.02540308
## 0.0008902151 0.03279810 0.18323798 0.02542870
## 0.0009326033 0.03282889 0.18206251 0.02545510
## 0.0009770100 0.03285878 0.18096998 0.02547978
## 0.0010235310 0.03289140 0.17974716 0.02550598
## 0.0010722672 0.03292430 0.17854335 0.02553380
## 0.0011233240 0.03296029 0.17717564 0.02556356
## 0.0011768120 0.03299728 0.17576951 0.02559499
## 0.0012328467 0.03303672 0.17423923 0.02562847
## 0.0012915497 0.03307535 0.17278254 0.02566136
## 0.0013530478 0.03311624 0.17121533 0.02569607
## 0.0014174742 0.03315617 0.16973972 0.02572815
## 0.0014849683 0.03319890 0.16812391 0.02576203
## 0.0015556761 0.03324158 0.16654687 0.02579542
## 0.0016297508 0.03328685 0.16484952 0.02583098
## 0.0017073526 0.03333220 0.16320084 0.02586724
## 0.0017886495 0.03338000 0.16144753 0.02590568
## 0.0018738174 0.03342712 0.15978295 0.02594193
## 0.0019630407 0.03347856 0.15788606 0.02598108
## 0.0020565123 0.03353399 0.15575811 0.02602244
## 0.0021544347 0.03359369 0.15337352 0.02606660
## 0.0022570197 0.03365683 0.15076893 0.02611286
## 0.0023644894 0.03372304 0.14797064 0.02616167
## 0.0024770764 0.03378617 0.14542029 0.02620831
## 0.0025950242 0.03384906 0.14294419 0.02625508
## 0.0027185882 0.03390323 0.14121072 0.02629583
## 0.0028480359 0.03395991 0.13937395 0.02633826
## 0.0029836472 0.03401841 0.13750608 0.02638257
## 0.0031257158 0.03408236 0.13530288 0.02643130
## 0.0032745492 0.03415226 0.13269500 0.02648415
## 0.0034304693 0.03422882 0.12957397 0.02654150
## 0.0035938137 0.03431265 0.12582703 0.02660394
## 0.0037649358 0.03440441 0.12131819 0.02667220
## 0.0039442061 0.03450485 0.11588703 0.02674552
## 0.0041320124 0.03461439 0.10938613 0.02682428
## 0.0043287613 0.03473277 0.10172545 0.02691068
## 0.0045348785 0.03485016 0.09386179 0.02699699
## 0.0047508102 0.03494206 0.08810816 0.02706372
## 0.0049770236 0.03502160 0.08351759 0.02712016
## 0.0052140083 0.03506365 0.08268736 0.02714938
## 0.0054622772 0.03510491 0.08228434 0.02717880
## 0.0057223677 0.03514622 0.08228409 0.02720772
## 0.0059948425 0.03519151 0.08228409 0.02723899
## 0.0062802914 0.03524115 0.08228409 0.02727347
## 0.0065793322 0.03529556 0.08228409 0.02731186
## 0.0068926121 0.03535518 0.08228409 0.02735411
## 0.0072208090 0.03542051 0.08228409 0.02740023
## 0.0075646333 0.03549207 0.08228409 0.02745106
## 0.0079248290 0.03557046 0.08228409 0.02750742
## 0.0083021757 0.03565629 0.08228409 0.02756928
## 0.0086974900 0.03575027 0.08228409 0.02763929
## 0.0091116276 0.03585313 0.08228409 0.02771663
## 0.0095454846 0.03596570 0.08228409 0.02780066
## 0.0100000000 0.03608884 0.08228409 0.02789191
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.0001450829.
## alpha lambda
## 9 1 0.0001450829
## alpha lambda RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.0001000000 0.03236190 0.20105937 0.02505164 0.0007599981 0.02418576 0.0006347056
## 2 1 0.0001047616 0.03236087 0.20107268 0.02505108 0.0007597587 0.02414405 0.0006337053
## 3 1 0.0001097499 0.03235987 0.20108411 0.02505059 0.0007594801 0.02410002 0.0006326823
## 4 1 0.0001149757 0.03235902 0.20108916 0.02505025 0.0007591934 0.02405581 0.0006316629
## 5 1 0.0001204504 0.03235824 0.20109125 0.02504994 0.0007588221 0.02400944 0.0006306606
## 6 1 0.0001261857 0.03235762 0.20108618 0.02504978 0.0007584236 0.02396174 0.0006296174
## 7 1 0.0001321941 0.03235714 0.20107529 0.02504983 0.0007579951 0.02391067 0.0006282957
## 8 1 0.0001384886 0.03235684 0.20105672 0.02505005 0.0007575193 0.02385592 0.0006268758
## 9 1 0.0001450829 0.03235671 0.20103063 0.02505047 0.0007570231 0.02379725 0.0006254009
## 10 1 0.0001519911 0.03235672 0.20099938 0.02505103 0.0007565016 0.02373705 0.0006238238
## 11 1 0.0001592283 0.03235693 0.20095987 0.02505166 0.0007559951 0.02367192 0.0006221452
## 12 1 0.0001668101 0.03235731 0.20091382 0.02505242 0.0007554487 0.02359960 0.0006203742
## 13 1 0.0001747528 0.03235787 0.20086176 0.02505337 0.0007547587 0.02352493 0.0006185562
## 14 1 0.0001830738 0.03235863 0.20080214 0.02505447 0.0007540248 0.02344416 0.0006163596
## 15 1 0.0001917910 0.03235968 0.20073138 0.02505567 0.0007532908 0.02335947 0.0006139964
## 16 1 0.0002009233 0.03236096 0.20065325 0.02505697 0.0007524909 0.02326688 0.0006114605
## 17 1 0.0002104904 0.03236269 0.20055655 0.02505866 0.0007518047 0.02316899 0.0006087151
## 18 1 0.0002205131 0.03236483 0.20044367 0.02506067 0.0007510809 0.02306587 0.0006059799
## 19 1 0.0002310130 0.03236750 0.20030962 0.02506310 0.0007505097 0.02295739 0.0006030041
## 20 1 0.0002420128 0.03237082 0.20014903 0.02506586 0.0007500482 0.02285112 0.0005999298
## 21 1 0.0002535364 0.03237460 0.19997140 0.02506892 0.0007494675 0.02273865 0.0005967893
## 22 1 0.0002656088 0.03237883 0.19977786 0.02507257 0.0007488625 0.02260706 0.0005940851
## 23 1 0.0002782559 0.03238329 0.19958041 0.02507641 0.0007475076 0.02247005 0.0005910390
## 24 1 0.0002915053 0.03238812 0.19937239 0.02508088 0.0007462742 0.02232794 0.0005880350
## 25 1 0.0003053856 0.03239347 0.19914718 0.02508587 0.0007448160 0.02217993 0.0005846457
## 26 1 0.0003199267 0.03239945 0.19889884 0.02509131 0.0007434012 0.02202899 0.0005814037
## 27 1 0.0003351603 0.03240619 0.19862224 0.02509721 0.0007416965 0.02187253 0.0005779455
## 28 1 0.0003511192 0.03241335 0.19833494 0.02510323 0.0007399613 0.02169278 0.0005744786
## 29 1 0.0003678380 0.03242122 0.19802277 0.02510972 0.0007378196 0.02150320 0.0005701840
## 30 1 0.0003853529 0.03242955 0.19769932 0.02511616 0.0007357252 0.02129322 0.0005658572
## 31 1 0.0004037017 0.03243891 0.19733726 0.02512347 0.0007331974 0.02106920 0.0005611342
## 32 1 0.0004229243 0.03244931 0.19693628 0.02513134 0.0007308006 0.02084388 0.0005561445
## 33 1 0.0004430621 0.03246079 0.19649531 0.02514009 0.0007276888 0.02061046 0.0005510806
## 34 1 0.0004641589 0.03247308 0.19602820 0.02514951 0.0007246315 0.02037584 0.0005462623
## 35 1 0.0004862602 0.03248691 0.19549774 0.02516117 0.0007216637 0.02014482 0.0005414703
## 36 1 0.0005094138 0.03250204 0.19491787 0.02517408 0.0007190359 0.01994399 0.0005370409
## 37 1 0.0005336699 0.03251877 0.19427109 0.02518844 0.0007165231 0.01974554 0.0005321195
## 38 1 0.0005590810 0.03253658 0.19358637 0.02520341 0.0007145125 0.01954877 0.0005277197
## 39 1 0.0005857021 0.03255636 0.19281423 0.02522000 0.0007124107 0.01934101 0.0005234516
## 40 1 0.0006135907 0.03257792 0.19196344 0.02523807 0.0007107217 0.01911139 0.0005196218
## 41 1 0.0006428073 0.03260141 0.19102673 0.02525775 0.0007084506 0.01888449 0.0005141496
## 42 1 0.0006734151 0.03262631 0.19003400 0.02527958 0.0007063343 0.01868250 0.0005084231
## 43 1 0.0007054802 0.03265334 0.18894158 0.02530313 0.0007039939 0.01849979 0.0005016792
## 44 1 0.0007390722 0.03268001 0.18788934 0.02532709 0.0007023441 0.01830260 0.0004953880
## 45 1 0.0007742637 0.03270867 0.18674610 0.02535227 0.0006994833 0.01813962 0.0004879891
## 46 1 0.0008111308 0.03273750 0.18561151 0.02537684 0.0006975315 0.01793709 0.0004821234
## 47 1 0.0008497534 0.03276797 0.18440397 0.02540308 0.0006947957 0.01778303 0.0004770644
## 48 1 0.0008902151 0.03279810 0.18323798 0.02542870 0.0006926994 0.01767524 0.0004731975
## 49 1 0.0009326033 0.03282889 0.18206251 0.02545510 0.0006864006 0.01763383 0.0004671987
## 50 1 0.0009770100 0.03285878 0.18096998 0.02547978 0.0006807591 0.01756922 0.0004617207
## 51 1 0.0010235310 0.03289140 0.17974716 0.02550598 0.0006752897 0.01754158 0.0004557867
## 52 1 0.0010722672 0.03292430 0.17854335 0.02553380 0.0006704859 0.01754072 0.0004505885
## 53 1 0.0011233240 0.03296029 0.17717564 0.02556356 0.0006655448 0.01755728 0.0004458497
## 54 1 0.0011768120 0.03299728 0.17576951 0.02559499 0.0006611257 0.01754051 0.0004418841
## 55 1 0.0012328467 0.03303672 0.17423923 0.02562847 0.0006572321 0.01761480 0.0004381028
## 56 1 0.0012915497 0.03307535 0.17278254 0.02566136 0.0006550717 0.01772233 0.0004347874
## 57 1 0.0013530478 0.03311624 0.17121533 0.02569607 0.0006525182 0.01792433 0.0004319126
## 58 1 0.0014174742 0.03315617 0.16973972 0.02572815 0.0006519709 0.01810061 0.0004306213
## 59 1 0.0014849683 0.03319890 0.16812391 0.02576203 0.0006514785 0.01833071 0.0004284061
## 60 1 0.0015556761 0.03324158 0.16654687 0.02579542 0.0006521901 0.01844395 0.0004261198
## 61 1 0.0016297508 0.03328685 0.16484952 0.02583098 0.0006525437 0.01860566 0.0004231999
## 62 1 0.0017073526 0.03333220 0.16320084 0.02586724 0.0006541798 0.01873564 0.0004194866
## 63 1 0.0017886495 0.03338000 0.16144753 0.02590568 0.0006559242 0.01896662 0.0004164559
## 64 1 0.0018738174 0.03342712 0.15978295 0.02594193 0.0006587158 0.01915624 0.0004130941
## 65 1 0.0019630407 0.03347856 0.15788606 0.02598108 0.0006622427 0.01938369 0.0004106596
## 66 1 0.0020565123 0.03353399 0.15575811 0.02602244 0.0006666872 0.01963081 0.0004090294
## 67 1 0.0021544347 0.03359369 0.15337352 0.02606660 0.0006685037 0.01996397 0.0004060412
## 68 1 0.0022570197 0.03365683 0.15076893 0.02611286 0.0006701500 0.02012742 0.0004031049
## 69 1 0.0023644894 0.03372304 0.14797064 0.02616167 0.0006699253 0.02038046 0.0003989019
## 70 1 0.0024770764 0.03378617 0.14542029 0.02620831 0.0006712049 0.02028054 0.0003962720
## 71 1 0.0025950242 0.03384906 0.14294419 0.02625508 0.0006696809 0.02042607 0.0003933469
## 72 1 0.0027185882 0.03390323 0.14121072 0.02629583 0.0006718838 0.02019410 0.0003908570
## 73 1 0.0028480359 0.03395991 0.13937395 0.02633826 0.0006714922 0.02007112 0.0003866584
## 74 1 0.0029836472 0.03401841 0.13750608 0.02638257 0.0006717751 0.01996412 0.0003836987
## 75 1 0.0031257158 0.03408236 0.13530288 0.02643130 0.0006722409 0.01984422 0.0003804000
## 76 1 0.0032745492 0.03415226 0.13269500 0.02648415 0.0006729035 0.01972421 0.0003768266
## 77 1 0.0034304693 0.03422882 0.12957397 0.02654150 0.0006737146 0.01959511 0.0003740678
## 78 1 0.0035938137 0.03431265 0.12582703 0.02660394 0.0006746941 0.01945866 0.0003707322
## 79 1 0.0037649358 0.03440441 0.12131819 0.02667220 0.0006758644 0.01931746 0.0003665531
## 80 1 0.0039442061 0.03450485 0.11588703 0.02674552 0.0006772510 0.01917426 0.0003628740
## 81 1 0.0041320124 0.03461439 0.10938613 0.02682428 0.0006788954 0.01904253 0.0003598815
## 82 1 0.0043287613 0.03473277 0.10172545 0.02691068 0.0006812282 0.01881231 0.0003564423
## 83 1 0.0045348785 0.03485016 0.09386179 0.02699699 0.0006826026 0.01919357 0.0003548354
## 84 1 0.0047508102 0.03494206 0.08810816 0.02706372 0.0006881413 0.01794054 0.0003611619
## 85 1 0.0049770236 0.03502160 0.08351759 0.02712016 0.0006841371 0.01834190 0.0003608311
## 86 1 0.0052140083 0.03506365 0.08268736 0.02714938 0.0006933787 0.01766165 0.0003683290
## 87 1 0.0054622772 0.03510491 0.08228434 0.02717880 0.0006980672 0.01755866 0.0003733869
## 88 1 0.0057223677 0.03514622 0.08228409 0.02720772 0.0007035110 0.01755905 0.0003775252
## 89 1 0.0059948425 0.03519151 0.08228409 0.02723899 0.0007092091 0.01755905 0.0003820465
## 90 1 0.0062802914 0.03524115 0.08228409 0.02727347 0.0007151715 0.01755905 0.0003868525
## 91 1 0.0065793322 0.03529556 0.08228409 0.02731186 0.0007214095 0.01755905 0.0003917255
## 92 1 0.0068926121 0.03535518 0.08228409 0.02735411 0.0007279351 0.01755905 0.0003967242
## 93 1 0.0072208090 0.03542051 0.08228409 0.02740023 0.0007347608 0.01755905 0.0004018903
## 94 1 0.0075646333 0.03549207 0.08228409 0.02745106 0.0007418997 0.01755905 0.0004074043
## 95 1 0.0079248290 0.03557046 0.08228409 0.02750742 0.0007493653 0.01755905 0.0004135891
## 96 1 0.0083021757 0.03565629 0.08228409 0.02756928 0.0007571721 0.01755905 0.0004205084
## 97 1 0.0086974900 0.03575027 0.08228409 0.02763929 0.0007653349 0.01755905 0.0004274920
## 98 1 0.0091116276 0.03585313 0.08228409 0.02771663 0.0007738697 0.01755905 0.0004354962
## 99 1 0.0095454846 0.03596570 0.08228409 0.02780066 0.0007827932 0.01755905 0.0004441064
## 100 1 0.0100000000 0.03608884 0.08228409 0.02789191 0.0007921235 0.01755905 0.0004534858
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients"
## model.coef
## (Intercept) 2.096520e+00
## PC1 -9.434734e-04
## PC2 1.163513e-03
## PC3 -2.405772e-04
## PC4 1.273234e-04
## PC5 6.383566e-04
## PC6 3.112195e-05
## PC7 -4.718467e-04
## PC8 -1.663756e-04
## PC9 -1.889095e-04
## PC10 2.190858e-04
## PC11 -1.321923e-03
## PC12 3.080031e-04
## PC13 3.717534e-04
## PC14 1.454215e-03
## PC15 -3.176498e-04
## PC16 7.437512e-04
## PC17 -1.221111e-04
## PC18 3.542595e-04
## PC19 -2.776758e-04
## PC20 8.607844e-04
## PC21 -9.666859e-04
## PC22 3.952461e-03
## PC23 -4.166956e-04
## PC24 -1.331638e-03
## PC25 8.622455e-04
## PC26 -3.465631e-04
## PC27 -2.514303e-04
## PC28 6.568033e-04
## PC29 -6.027372e-04
## PC30 5.293043e-04
## PC31 -2.300414e-04
## PC32 1.197365e-03
## PC33 1.609246e-05
## PC36 4.581326e-04
## PC37 -2.499862e-04
## PC38 3.020890e-04
## PC39 2.102455e-04
## PC40 1.794375e-04
## PC41 -5.773453e-04
## PC42 -4.306599e-04
## PC43 5.851307e-04
## PC44 -3.861284e-04
## PC45 1.097755e-03
## PC46 1.381582e-03
## PC47 4.254507e-04
## PC49 6.504350e-05
## PC50 -7.591232e-04
## PC51 4.131483e-04
## PC52 2.715325e-04
## PC53 -4.125363e-04
## PC55 2.562778e-05
## PC57 2.963589e-04
## PC58 -4.270153e-04
## PC59 1.139268e-03
## PC60 -6.675211e-04
## PC61 -1.468285e-04
## PC62 -4.333352e-04
## PC63 5.431812e-04
## PC64 6.411887e-05
## PC65 7.599744e-04
## PC66 4.984856e-05
## PC67 2.625191e-04
## PC68 7.676511e-04
## PC69 -1.125275e-03
## PC71 -6.910116e-04
## PC72 7.123027e-04
## PC73 4.641420e-04
## PC75 1.261676e-04
## PC76 1.467970e-03
## PC78 -2.470587e-04
## PC79 6.842701e-04
## PC80 -2.375074e-04
## PC81 -4.368029e-04
## PC82 -9.842284e-04
## PC83 1.049882e-03
## PC85 2.695494e-04
## PC86 4.221395e-04
## PC87 4.448277e-04
## PC88 2.953262e-04
## PC89 1.261681e-04
## PC90 -2.230788e-03
## PC91 2.686513e-04
## PC92 1.551006e-04
## PC93 -2.914432e-05
## PC94 4.116545e-04
## PC95 1.146995e-04
## PC96 -7.456882e-04
## PC97 -1.571455e-04
## PC98 4.396447e-04
## PC101 -8.547762e-04
## PC102 1.297625e-03
## PC103 1.320225e-05
## PC104 5.103250e-04
## PC105 1.117939e-04
## PC106 8.040508e-04
## PC107 2.590163e-04
## PC108 -1.068187e-03
## PC109 -1.796570e-04
## PC110 -2.752258e-04
## PC111 -6.690987e-04
## PC112 7.819293e-04
## PC114 -1.607113e-04
## PC115 -5.157004e-04
## PC117 -1.544365e-03
## PC118 -8.037471e-04
## PC119 -6.830647e-04
## PC120 -3.994250e-04
## PC121 2.625639e-04
## PC123 -5.934911e-04
## PC126 8.771437e-04
## PC127 9.211607e-04
## PC128 -3.131555e-04
## PC129 -6.028241e-04
## PC131 1.008670e-03
## PC132 1.364081e-03
## PC133 -3.482159e-04
## PC134 -1.411284e-04
## PC136 4.925246e-04
## PC138 9.809952e-04
if (algo.LASSO.caret == TRUE){
test.model(model.LASSO.caret, data.test
,method = 'glmnet',subopt = "LASSO"
,formula = formula, feature.names = feature.names, label.names = label.names
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.037 2.086 2.098 2.097 2.109 2.154
## [1] "glmnet LASSO Test MSE: 0.00102589494768998"
if (algo.LARS.caret == TRUE){
set.seed(1)
returned = train.caret.glmselect(formula = formula
,data = data.train
,method = "lars"
,subopt = 'NULL'
,feature.names = feature.names)
model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.848 on full training set
## Least Angle Regression
##
## 5584 samples
## 138 predictor
##
## Pre-processing: centered (138), scaled (138)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ...
## Resampling results across tuning parameters:
##
## fraction RMSE Rsquared MAE
## 0.00000000 0.03617859 NaN 0.02795891
## 0.01010101 0.03583652 0.08228409 0.02770456
## 0.02020202 0.03553677 0.08228409 0.02748340
## 0.03030303 0.03528042 0.08228409 0.02730132
## 0.04040404 0.03506951 0.08236334 0.02715343
## 0.05050505 0.03491192 0.08960292 0.02704117
## 0.06060606 0.03475285 0.10045185 0.02692344
## 0.07070707 0.03459739 0.11057146 0.02681071
## 0.08080808 0.03445064 0.11901463 0.02670491
## 0.09090909 0.03431271 0.12593822 0.02660274
## 0.10101010 0.03418370 0.13154481 0.02650672
## 0.11111111 0.03406372 0.13604287 0.02641613
## 0.12121212 0.03395489 0.13959693 0.02633448
## 0.13131313 0.03386210 0.14245698 0.02626525
## 0.14141414 0.03377608 0.14568772 0.02620197
## 0.15151515 0.03369163 0.14931545 0.02613907
## 0.16161616 0.03360996 0.15274363 0.02607883
## 0.17171717 0.03353216 0.15589374 0.02602156
## 0.18181818 0.03345971 0.15865386 0.02596743
## 0.19191919 0.03339432 0.16099539 0.02591723
## 0.20202020 0.03333573 0.16305384 0.02586990
## 0.21212121 0.03328017 0.16513137 0.02582496
## 0.22222222 0.03323077 0.16698990 0.02578614
## 0.23232323 0.03318498 0.16871462 0.02575066
## 0.24242424 0.03314283 0.17029447 0.02571768
## 0.25252525 0.03310373 0.17175409 0.02568550
## 0.26262626 0.03306739 0.17311945 0.02565402
## 0.27272727 0.03303296 0.17443936 0.02562406
## 0.28282828 0.03300016 0.17572874 0.02559644
## 0.29292929 0.03297050 0.17686835 0.02557202
## 0.30303030 0.03294296 0.17790387 0.02554897
## 0.31313131 0.03291665 0.17888121 0.02552665
## 0.32323232 0.03289223 0.17977739 0.02550648
## 0.33333333 0.03286856 0.18065191 0.02548749
## 0.34343434 0.03284689 0.18144312 0.02547007
## 0.35353535 0.03282641 0.18219490 0.02545282
## 0.36363636 0.03280610 0.18295531 0.02543552
## 0.37373737 0.03278637 0.18370210 0.02541827
## 0.38383838 0.03276658 0.18447601 0.02540150
## 0.39393939 0.03274728 0.18524226 0.02538516
## 0.40404040 0.03272834 0.18599698 0.02536882
## 0.41414141 0.03271014 0.18671707 0.02535287
## 0.42424242 0.03269230 0.18742583 0.02533727
## 0.43434343 0.03267488 0.18811888 0.02532154
## 0.44444444 0.03265732 0.18882764 0.02530588
## 0.45454545 0.03264038 0.18950682 0.02529110
## 0.46464646 0.03262403 0.19016099 0.02527702
## 0.47474747 0.03260805 0.19080251 0.02526330
## 0.48484848 0.03259262 0.19142469 0.02525033
## 0.49494949 0.03257790 0.19201249 0.02523795
## 0.50505051 0.03256399 0.19256197 0.02522640
## 0.51515152 0.03255102 0.19306559 0.02521570
## 0.52525253 0.03253882 0.19353232 0.02520528
## 0.53535354 0.03252713 0.19397885 0.02519519
## 0.54545455 0.03251577 0.19441402 0.02518549
## 0.55555556 0.03250476 0.19483713 0.02517619
## 0.56565657 0.03249417 0.19524482 0.02516728
## 0.57575758 0.03248415 0.19562789 0.02515872
## 0.58585859 0.03247479 0.19598221 0.02515071
## 0.59595960 0.03246614 0.19630629 0.02514381
## 0.60606061 0.03245776 0.19662319 0.02513753
## 0.61616162 0.03244964 0.19693214 0.02513143
## 0.62626263 0.03244200 0.19722417 0.02512566
## 0.63636364 0.03243467 0.19750586 0.02512017
## 0.64646465 0.03242790 0.19776511 0.02511505
## 0.65656566 0.03242148 0.19801191 0.02511029
## 0.66666667 0.03241550 0.19824327 0.02510545
## 0.67676768 0.03240951 0.19848078 0.02510054
## 0.68686869 0.03240376 0.19871261 0.02509564
## 0.69696970 0.03239833 0.19893331 0.02509088
## 0.70707071 0.03239308 0.19915097 0.02508617
## 0.71717172 0.03238818 0.19935621 0.02508168
## 0.72727273 0.03238351 0.19955636 0.02507737
## 0.73737374 0.03237932 0.19973857 0.02507352
## 0.74747475 0.03237516 0.19992662 0.02506993
## 0.75757576 0.03237120 0.20010944 0.02506657
## 0.76767677 0.03236760 0.20028041 0.02506361
## 0.77777778 0.03236453 0.20043147 0.02506089
## 0.78787879 0.03236202 0.20056203 0.02505858
## 0.79797980 0.03236000 0.20067437 0.02505660
## 0.80808081 0.03235849 0.20076757 0.02505498
## 0.81818182 0.03235747 0.20084308 0.02505367
## 0.82828283 0.03235661 0.20091785 0.02505232
## 0.83838384 0.03235611 0.20098162 0.02505121
## 0.84848485 0.03235594 0.20103537 0.02505029
## 0.85858586 0.03235616 0.20107640 0.02504957
## 0.86868687 0.03235695 0.20109733 0.02504941
## 0.87878788 0.03235820 0.20110218 0.02504976
## 0.88888889 0.03235998 0.20108877 0.02505058
## 0.89898990 0.03236227 0.20105801 0.02505182
## 0.90909091 0.03236501 0.20101345 0.02505317
## 0.91919192 0.03236821 0.20095389 0.02505474
## 0.92929293 0.03237179 0.20088332 0.02505645
## 0.93939394 0.03237574 0.20080267 0.02505836
## 0.94949495 0.03238005 0.20071203 0.02506049
## 0.95959596 0.03238484 0.20060560 0.02506312
## 0.96969697 0.03239017 0.20048122 0.02506628
## 0.97979798 0.03239589 0.20034591 0.02506967
## 0.98989899 0.03240177 0.20021106 0.02507302
## 1.00000000 0.03240803 0.20006564 0.02507649
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.8484848.
## fraction
## 85 0.8484848
## Warning: Removed 1 rows containing missing values (geom_point).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Coefficients"
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## -4.410256e-03 4.928519e-03 -9.215452e-04 4.803960e-04 2.385848e-03 1.111594e-04 -1.723726e-03 -6.066736e-04
## PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## -6.595839e-04 7.540574e-04 -4.476848e-03 1.040885e-03 1.216707e-03 4.802392e-03 -1.020253e-03 2.341349e-03
## PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
## -3.683588e-04 1.049358e-03 -7.984739e-04 2.371517e-03 -2.600206e-03 1.021933e-02 -5.320689e-04 -1.537817e-03
## PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32
## 1.003121e-03 -3.971241e-04 -2.859889e-04 7.502361e-04 -6.881687e-04 6.032317e-04 -2.584945e-04 1.371130e-03
## PC33 PC36 PC37 PC38 PC39 PC40 PC41 PC42
## 1.401775e-05 5.156979e-04 -2.793512e-04 3.350076e-04 2.308271e-04 1.976635e-04 -6.501462e-04 -4.799941e-04
## PC43 PC44 PC45 PC46 PC47 PC49 PC50 PC51
## 6.506624e-04 -4.295708e-04 1.227854e-03 1.538914e-03 4.690743e-04 6.791953e-05 -8.384836e-04 4.587936e-04
## PC52 PC53 PC55 PC57 PC58 PC59 PC60 PC61
## 2.978809e-04 -4.495925e-04 2.339435e-05 3.223448e-04 -4.643092e-04 1.239946e-03 -7.222865e-04 -1.536785e-04
## PC62 PC63 PC64 PC65 PC66 PC67 PC68 PC69
## -4.696496e-04 5.845760e-04 6.484707e-05 8.160352e-04 4.954148e-05 2.784324e-04 8.247424e-04 -1.208095e-03
## PC71 PC72 PC73 PC75 PC76 PC78 PC79 PC80
## -7.378094e-04 7.642804e-04 4.961340e-04 1.298233e-04 1.570179e-03 -2.580188e-04 7.264497e-04 -2.482595e-04
## PC81 PC82 PC83 PC85 PC86 PC87 PC88 PC89
## -4.565037e-04 -1.043217e-03 1.112338e-03 2.800245e-04 4.374583e-04 4.637188e-04 3.059287e-04 1.285456e-04
## PC90 PC91 PC92 PC93 PC94 PC95 PC96 PC97
## -2.340728e-03 2.780811e-04 1.582541e-04 -2.611301e-05 4.255402e-04 1.155077e-04 -7.699293e-04 -1.587765e-04
## PC98 PC101 PC102 PC103 PC104 PC105 PC106 PC107
## 4.497164e-04 -8.835800e-04 1.331258e-03 8.767323e-06 5.217942e-04 1.096582e-04 8.191342e-04 2.616697e-04
## PC108 PC109 PC110 PC111 PC112 PC114 PC115 PC117
## -1.090502e-03 -1.794619e-04 -2.778376e-04 -6.796458e-04 7.950152e-04 -1.589168e-04 -5.214930e-04 -1.563291e-03
## PC118 PC119 PC120 PC121 PC123 PC126 PC127 PC128
## -8.096809e-04 -6.824433e-04 -3.983990e-04 2.589110e-04 -5.919343e-04 8.740385e-04 9.263851e-04 -3.064481e-04
## PC129 PC131 PC132 PC133 PC134 PC136 PC138
## -5.978469e-04 9.995790e-04 1.359012e-03 -3.413935e-04 -1.358413e-04 4.816001e-04 9.585577e-04
if (algo.LARS.caret == TRUE){
test.model(model.LARS.caret, data.test
,method = 'lars',subopt = NULL
,formula = formula, feature.names = feature.names, label.names = label.names
,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.037 2.086 2.098 2.097 2.109 2.154
## [1] "lars Test MSE: 0.00102574845474681"
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C LC_TIME=English_United States.1252
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 knitr_1.20 htmltools_0.3.6 reshape2_1.4.3
## [5] lars_1.2 doParallel_1.0.14 iterators_1.0.10 caret_6.0-81
## [9] leaps_3.0 ggforce_0.1.3 rlist_0.4.6.1 car_3.0-2
## [13] carData_3.0-2 bestNormalize_1.3.0 scales_1.0.0 onewaytests_2.0
## [17] caTools_1.17.1.1 mosaic_1.5.0 mosaicData_0.17.0 ggformula_0.9.1
## [21] ggstance_0.3.1 lattice_0.20-35 DT_0.5 ggiraph_0.6.0
## [25] investr_1.4.0 glmnet_2.0-16 foreach_1.4.4 Matrix_1.2-14
## [29] MASS_7.3-50 PerformanceAnalytics_1.5.2 xts_0.11-2 zoo_1.8-4
## [33] forcats_0.3.0 stringr_1.3.1 dplyr_0.7.8 purrr_0.2.5
## [37] readr_1.3.1 tidyr_0.8.2 tibble_1.4.2 ggplot2_3.1.0
## [41] tidyverse_1.2.1 usdm_1.1-18 raster_2.8-4 sp_1.3-1
## [45] pacman_0.5.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.2.0 backports_1.1.3 plyr_1.8.4 lazyeval_0.2.1 splines_3.5.1 mycor_0.1.1
## [7] crosstalk_1.0.0 leaflet_2.0.2 digest_0.6.18 magrittr_1.5 mosaicCore_0.6.0 openxlsx_4.1.0
## [13] recipes_0.1.4 modelr_0.1.2 gower_0.1.2 colorspace_1.3-2 rvest_0.3.2 ggrepel_0.8.0
## [19] haven_2.0.0 crayon_1.3.4 jsonlite_1.5 bindr_0.1.1 survival_2.42-3 glue_1.3.0
## [25] registry_0.5 gtable_0.2.0 ppcor_1.1 ipred_0.9-8 abind_1.4-5 rngtools_1.3.1
## [31] bibtex_0.4.2 Rcpp_1.0.0 xtable_1.8-3 units_0.6-2 foreign_0.8-70 stats4_3.5.1
## [37] lava_1.6.4 prodlim_2018.04.18 htmlwidgets_1.3 httr_1.4.0 RColorBrewer_1.1-2 pkgconfig_2.0.2
## [43] farver_1.1.0 nnet_7.3-12 labeling_0.3 tidyselect_0.2.5 rlang_0.3.1 later_0.7.5
## [49] munsell_0.5.0 cellranger_1.1.0 tools_3.5.1 cli_1.0.1 generics_0.0.2 moments_0.14
## [55] sjlabelled_1.0.17 broom_0.5.1 evaluate_0.12 ggdendro_0.1-20 yaml_2.2.0 ModelMetrics_1.2.2
## [61] zip_2.0.1 nlme_3.1-137 doRNG_1.7.1 mime_0.6 xml2_1.2.0 compiler_3.5.1
## [67] rstudioapi_0.8 curl_3.2 tweenr_1.0.1 stringi_1.2.4 gdtools_0.1.7 pillar_1.3.1
## [73] data.table_1.11.8 bitops_1.0-6 insight_0.1.2 httpuv_1.4.5 R6_2.3.0 promises_1.0.1
## [79] gridExtra_2.3 rio_0.5.16 codetools_0.2-15 assertthat_0.2.0 pkgmaker_0.27 withr_2.1.2
## [85] nortest_1.0-4 mgcv_1.8-24 hms_0.4.2 quadprog_1.5-5 grid_3.5.1 rpart_4.1-13
## [91] timeDate_3043.102 class_7.3-14 rmarkdown_1.11 shiny_1.2.0 lubridate_1.7.4